home *** CD-ROM | disk | FTP | other *** search
Text File | 1998-05-21 | 106.2 KB | 3,506 lines |
- ;; TREX: Tools for Regluar EXpressions
- ;;
- ;; Regular Expression Compiler
- ;;
- ;; Coded by S.Tomura <tomura@etl.go.jp>
-
- ;; Copyright (C) 1992 Free Software Foundation, Inc.
-
- ;; This file is part of XEmacs.
- ;; This file contains Japanese characters
-
- ;; XEmacs is free software; you can redistribute it and/or modify it
- ;; under the terms of the GNU General Public License as published by
- ;; the Free Software Foundation; either version 2, or (at your option)
- ;; any later version.
-
- ;; XEmacs is distributed in the hope that it will be useful, but
- ;; WITHOUT ANY WARRANTY; without even the implied warranty of
- ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- ;; General Public License for more details.
-
- ;; You should have received a copy of the GNU General Public License
- ;; along with XEmacs; see the file COPYING. If not, write to the
- ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- ;; Boston, MA 02111-1307, USA.
-
- (defvar TREX-version "0.41")
- ;;; Last modified date: Thu Jun 15 13:07:39 1995
-
- ;;; 95.6.15 modified by S.Tomura <tomura@etl.go.jp>
- ;;;
- ;;; $BFbB"$N(Bre_compile_pattern $B$HF1MM$K(B case-fold-search $B$K$h$C$F!"(B
- ;;; translate $B$9$k$h$&$KJQ99$7$?!#(B
- ;;;
- ;;; 95.6.14 modified by S.Tomura <tomura@etl.go.jp>
- ;;; print-translate $B$rDI2C!#(B<0.38>
- ;;; print-fastmap $B$rDI2C!#(B
- ;;;
- ;;; start_memory, end_memory $B$NBh(B2$B0z?t$r@8@.$9$k$?$a$K!"(B:mark $B$NFbIt9=(B
- ;;; $BB$$rJQ99$7$?!#(B
- ;;;
- ;;; re-compile-and-dump, regexp-compile-and-dump $B$rDI2C!#(B
- ;;;
- ;;; 95.6.13
- ;;; regexp19.c $B$KBP1~$7$F(B start_memory, end_memory $B$N(B dump $B%k!<%A%s$r=$@5(B
- ;;;
- ;;; $B$9$Y$-$3$H!'(B
- ;;;
- ;;; (1) \(\)*
- ;;; (2) $B;^$N=gHV(B
- ;;; (3) $B0UL#$N$J$$%0%k!<%W;2>H$N8!=P(B "\(a\\)\\2"$B$J$I(B
-
- (defmacro TREX-inc (symbol &optional delta)
- (list 'setq symbol (if delta (list '+ symbol delta)
- (list '1+ symbol))))
-
- (defmacro TREX-dec (symbol &optional delta)
- (list 'setq symbol (if delta (list '- symbol delta)
- (list '1- symbol))))
-
- (defmacro num (sym)
- (list 'num* (list 'quote sym)))
-
- (defun num* (sym)
- (TREX-read-hexa (substring (symbol-name sym) 2)))
-
- (defun TREX-read-hexa (str)
- (let ((result 0) (i 0) (max (length str)))
- (while (< i max)
- (let ((ch (aref str i)))
- (cond((and (<= ?0 ch) (<= ch ?9))
- (setq result (+ (* result 16) (- ch ?0))))
- ((and (<= ?a ch) (<= ch ?f))
- (setq result (+ (* result 16) (+ (- ch ?a) 10))))
- ((and (<= ?A ch) (<= ch ?F))
- (setq result (+ (* result 16) (+ (- ch ?A) 10)))))
- (TREX-inc i)))
- result))
-
- ;;; 1 bytes : 0x00 <= C11 <= 0x7F
- ;;; n bytes : 0x80 == LCCMP
- ;;; 2 bytes 0xA0 <= LC <= 0xAF
- ;;; 3 bytes 0xB0 <= LC <= 0xBB
- ;;; 4 bytes 0xBC <= LC <= 0xBE
- ;;; 2 bytes : 0x81 <= LC <= 0x8F
- ;;; 3 bytes : 0x90 <= LC <= 0x9B
- ;;; 4 bytes : 0x9C <= LC <= 0x9E
-
-
- (defun TREX-char-octets (str index)
- (let ((max (length str)))
- (if (or (< index 0) (<= max index)) 0
- (let ((ch (aref str index))
- (bytes))
- (setq bytes
- (cond ((<= ch (num 0x7f)) 1)
- ((= ch (num 0x80))
- (let ((max (length str))
- (i index))
- (while (and (< i max)
- (<= (num 0xa0) (aref str i))
- (<= (aref str i) (num 0xbe)))
- (setq ch (aref str i))
- (cond ((<= ch (num 0xaf)) (TREX-inc i 2))
- ((<= ch (num 0xbb)) (TREX-inc i 3))
- ((<= ch (num 0xbe)) (TREX-inc i 4))))
- (- i index)))
- ((<= ch (num 0x8f)) 2)
- ((<= ch (num 0x9b)) 3)
- ((<= ch (num 0x9e)) 4)
- (t 1)))
- (if (<= (+ index bytes) max) bytes 1)))))
-
- (defun TREX-comp-charp (str index)
- (= (aref str index) (num 0x80)))
-
- ;;; 0x00 <= C11 <= 0x7F : 1 bytes
- ;;; Type 1-1 C11
- ;;; 0x80 == LCCMP : n bytes
- ;;; Type N LCCMP LCN1 C11 ... LCN2 C21 ... LCNn Cn1 ...
- ;;; 0xA0 <= LCN* <= 0xBE
- ;;; LCN* = LC + 0x20
- ;;; LCN* = 0xA0 (ASCII)
- ;;; 0x81 <= LC1 <= 0x8F : 2 bytes
- ;;; Type 1-2 LC1 C11 :
- ;;; 0xA0 <= C11 <= 0xFF
- ;;; 0x90 <= LC2 <= 0x99 : 3 bytes
- ;;; Type 2-3 LC2 C21 C22
- ;;; 0xA0 <= C21 <= 0xFF
- ;;; 0xA0 <= C22 <= 0xFF
- ;;; 0x9A == LCPRV1 : 3 bytes
- ;;; Type 1-3 LCPRV1 LC12 C11
- ;;; 0xA0 <= LC12 <= 0xB7
- ;;; 0xA0 <= C11 <= 0xFF
- ;;; 0x9B == LCPRV1 : 3 bytes
- ;;; Type 1-3 LCPRV1 LC12 C11
- ;;; 0xB8 <= LC12 <= 0xBF
- ;;; 0xA0 <= C11 <= 0xFF
- ;;; 0x9C == LCPRV2 : 4 bytes
- ;;; Type 2-4 LCPRV2 LC22 C21 C22
- ;;; 0xC0 <= LC22 <= 0xC7
- ;;; 0xA0 <= C21 <= 0xFF
- ;;; 0xA0 <= C22 <= 0xFF
- ;;; 0x9D == LCPRV2 : 4 bytes
- ;;; Type 2-4 LCPRV2 LC22 C21 C22
- ;;; 0xC8 <= LC22 <= 0xDF
- ;;; 0xA0 <= C21 <= 0xFF
- ;;; 0xA0 <= C22 <= 0xFF
- ;;; 0x9E == LCPRV3 : 4 bytes
- ;;; Type 3-4 LCPRV3 C31 C32 C33
- ;;; 0xA0 <= C31 <= 0xBF
- ;;; 0xA0 <= C32 <= 0xFF
- ;;; 0xA0 <= C33 <= 0xFF
- ;;; char = [0x00-0x7f]\|
- ;;; 0x80
- ;;; \(0xa0[0xa0-0xff]\|
- ;;; [0xa1-0xaf][0xa0-0xff]\|
- ;;; [0xb0-0xb9][0xa0-0xff][0xa0-0xff]\|
- ;;; 0xba[0xa0-0xb7][0xa0-0xff]\|
- ;;; 0xbb[0xb8-0xbf][0xa0-0xff]\|
- ;;; 0xbc[0xc0-0xc7][0xa0-0xff][0xa0-0xff]\|
- ;;; 0xbd[0xc8-0xdf][0xa0-0xff][0xa0-0xff]\|
- ;;; 0xbe[0xa0-0xbf][0xa0-0xff][0xa0-0xff]
- ;;; \)*\|
- ;;; [0x81-0x8f][0xa0-0xff]\|
- ;;; [0x90-0x99][0xa0-0xff][0xa0-0xff]\|
- ;;; 0x9a[0xa0-0xb7][0xa0-0xff]\|
- ;;; 0x9b[0xb8-0xbf][0xa0-0xff]\|
- ;;; 0x9c[0xc0-0xc7][0xa0-0xff][0xa0-0xff]\|
- ;;; 0x9d[0xc8-0xdf][0xa0-0xff][0xa0-0xff]\|
- ;;; 0x9e[0xa0-0xbf][0xa0-0xff][0xa0-0xff]
-
- (defun regexp-make-or (&rest body)
- (cons ':or body))
-
- (defun regexp-make-seq (&rest body)
- (cons ':seq body))
-
- (defun regexp-make-star (regexp)
- (list ':star regexp))
-
- (defun regexp-make-range (from to)
- (list 'CHARSET (list ':range from to)))
-
-
- (defvar regexp-allchar-regexp
- (regexp-make-or
- (regexp-make-range 0 (num 0x7f))
- (regexp-make-seq
- (num 0x80)
- (regexp-make-star
- (regexp-make-or
- (regexp-make-seq
- (num 0xa0)
- (regexp-make-range (num 0xa0) (num 0xff)))
- (regexp-make-seq
- (regexp-make-range (num 0xa1) (num 0xaf))
- (regexp-make-range (num 0xa0) (num 0xff)))
- (regexp-make-seq
- (regexp-make-range (num 0xb0) (num 0xb9))
- (regexp-make-range (num 0xa0) (num 0xff))
- (regexp-make-range (num 0xa0) (num 0xff)))
- (regexp-make-seq
- (num 0xba)
- (regexp-make-range (num 0xa0) (num 0xb7))
- (regexp-make-range (num 0xa0) (num 0xff)))
- (regexp-make-seq
- (num 0xbb)
- (regexp-make-range (num 0xb8) (num 0xbf))
- (regexp-make-range (num 0xa0) (num 0xff)))
- (regexp-make-seq
- (num 0xbc)
- (regexp-make-range (num 0xc0) (num 0xc7))
- (regexp-make-range (num 0xa0) (num 0xff))
- (regexp-make-range (num 0xa0) (num 0xff)))
- (regexp-make-seq
- (num 0xbd)
- (regexp-make-range (num 0xc8) (num 0xdf))
- (regexp-make-range (num 0xa0) (num 0xff))
- (regexp-make-range (num 0xa0) (num 0xff)))
- (regexp-make-seq
- (num 0xbe)
- (regexp-make-range (num 0xa0) (num 0xbf))
- (regexp-make-range (num 0xa0) (num 0xff))
- (regexp-make-range (num 0xa0) (num 0xff))))))
- (regexp-make-seq
- (regexp-make-range (num 0x81) (num 0x8f))
- (regexp-make-range (num 0xa0) (num 0xff)))
- (regexp-make-seq
- (regexp-make-range (num 0x90) (num 0x99))
- (regexp-make-range (num 0xa0) (num 0xff))
- (regexp-make-range (num 0xa0) (num 0xff)))
- (regexp-make-seq
- (num 0x9a)
- (regexp-make-range (num 0xa0) (num 0xb7))
- (regexp-make-range (num 0xa0) (num 0xff)))
- (regexp-make-seq
- (num 0x9b)
- (regexp-make-range (num 0xb8) (num 0xbf))
- (regexp-make-range (num 0xa0) (num 0xff)))
- (regexp-make-seq
- (num 0x9c)
- (regexp-make-range (num 0xc0) (num 0xc7))
- (regexp-make-range (num 0xa0) (num 0xff))
- (regexp-make-range (num 0xa0) (num 0xff)))
- (regexp-make-seq
- (num 0x9d)
- (regexp-make-range (num 0xc8) (num 0xdf))
- (regexp-make-range (num 0xa0) (num 0xff))
- (regexp-make-range (num 0xa0) (num 0xff)))
- (regexp-make-seq
- (num 0x9e)
- (regexp-make-range (num 0xa0) (num 0xbf))
- (regexp-make-range (num 0xa0) (num 0xff))
- (regexp-make-range (num 0xa0) (num 0xff)))))
-
- ;;;;
- ;;;;
- ;;;;
-
- (defun TREX-string-reverse (str)
- (if (<= (length str) 1) str
- (let ((result (make-string (length str) 0))
- (i 0)
- (j (1- (length str))))
- (while (<= 0 j)
- (aset result i (aref str j))
- (TREX-inc i)
- (TREX-dec j))
- result)))
-
- (defun TREX-string-forward-anychar (str start)
- (and (stringp str) (numberp start)
- (let ((max (length str)))
- (and (<= 0 start)
- (< start max)
- (+ start (TREX-char-octets str start))))))
-
- (defmacro TREX-init (symbol value)
- (` (if (null (, symbol))
- (setq (, symbol) (, value)))))
-
- (defmacro TREX-push (val symbol)
- (list 'setq symbol (list 'cons val symbol)))
-
- (defun TREX-member (elm list pred)
- (while (and list (not (funcall pred elm (car list))))
- (setq list (cdr list)))
- list)
-
- (defun TREX-memequal (elm list)
- (while (and list (not (equal elm (car list))))
- (setq list (cdr list)))
- list)
-
- (defun TREX-find (elm list)
- (let ((pos 0))
- (while (and list (not (equal elm (car list))))
- (setq list (cdr list))
- (TREX-inc pos))
- (if list pos
- nil)))
-
- (defun TREX-find-if (pred list)
- (let ((pos 0))
- (while (and list (not (funcall pred (car list))))
- (TREX-inc pos)
- (setq list (cdr list)))
- (if list pos
- nil)))
-
- (defun TREX-firstn (list n)
- (if (or (<= n 0) (null list)) nil
- (cons (car list) (TREX-firstn (cdr list) (1- n)))))
-
- (defun TREX-delete-duplicate (list)
- (let ((result nil))
- (while list
- (let ((elm (car list)))
- (if (not (TREX-memequal elm result))
- (TREX-push elm result)))
- (setq list (cdr list)))
- (nreverse result)))
-
- (defun TREX-delete (elm list)
- (let ((result nil))
- (while list
- (if (not (equal elm (car list)))
- (TREX-push (car list) result))
- (setq list (cdr list)))
- (nreverse result)))
-
- (defun TREX-string-to-list (str)
- (let ((result nil)
- (i 0)
- (max (length str)))
- (while (< i max)
- (TREX-push (aref str i) result)
- (TREX-inc i))
- (nreverse result)))
-
- (defun TREX-sort (list lessp &optional key)
- (if (null key)
- (sort list lessp)
- (sort list (function (lambda (x y) (funcall lessp (funcall key x) (funcall key y)))))))
-
- (defun TREX-key-lessp (x y)
- (cond((symbolp x)
- (cond ((symbolp y)
- (string-lessp x y))
- (t;; (not (symbolp))
- t)))
- ((numberp x)
- (cond ((numberp y)
- (< x y))
- ((and (consp y) (eq (car y) ':range))
- (< x (nth 1 y)))
- (t nil)))
- ((and (consp x) (eq (car x) ':range))
- (cond ((and (consp y) (eq (car y) ':range))
- (< (nth 2 x) (nth 1 y)))
- ((numberp y)
- (< (nth 2 x) y))
- (t nil)))
- (t nil)))
-
- (defun TREX-lessp-car (x y)
- (let ((x (car x))
- (y (car y)))
- (TREX-key-lessp x y)))
-
- (defmacro TREX-define-enum (&rest list)
- (list 'TREX-define-enum* (list 'quote list)))
-
- (defun TREX-define-enum* (list)
- (let ((i 0))
- (while list
- (set (car list) i)
- (TREX-inc i)
- (setq list (cdr list)))))
-
- ;;;
- ;;; regexp-parse
- ;;;
-
- ;;;
- ;;; $B@55,I=8=(B(regular expression)
- ;;;
- ;;; . single character except a newline
- ;;; REG* more than zero
- ;;; REG+ at least once
- ;;; REG? once or not at all
- ;;; [...] character set
- ;;; [^...] character not set
- ;;; ^ beginning of line
- ;;; $ end of line
- ;;; \ quote
- ;;; \| alternative
- ;;; \( ... \) group and mark
- ;;; \DIGIT
- ;;; \` beginning of buffer
- ;;; \' end of buffer
- ;;; \b beginning of word or end of word
- ;;; \B not \b
- ;;; \< beginning of word
- ;;; \> end of word
- ;;;
- ;;; \w word-constituent character
- ;;; \W not \w
- ;;; \sCODE syntax CODE character
- ;;; \SCODE not \sCODE
-
- ;;;
- ;;; REG0 ::= REG1 |
- ;;; REG1 "\\|" REG0
- ;;;
- ;;; REG1 ::= REG2 |
- ;;; REG2 REG1
- ;;;
- ;;; REG2 ::= REG3 |
- ;;; REG2 "*" |
- ;;; REG2 "+" |
- ;;; REG2 "?" |
- ;;;
- ;;; REG3 ::= "." |
- ;;; "[" ... "]" |
- ;;; "[" "^" ... "]" |
- ;;; "^" |
- ;;; "$" |
- ;;; "\\" DIGIT |
- ;;; "\\(" REG0 "\\)"
-
- ;;; $B>H9g$O@55,I=8=$N:8$+$i1&$X9T$o$l$k!%(B
-
- (defvar *regexp-parse-translate* nil
- "$B@55,I=8=$rFI$_9~$_Cf$K;HMQ$9$k(B translate table.\n
- case-fold-search $B$NCM$K$h$C$F(B downcasetable $B$r@_Dj$9$k!#(B")
-
- (defun regexp-parse-translate-char-string (str)
- (if (and *regexp-parse-translate*
- (= (length str) 1))
- ;;; $BK\Ev$O(B destructive $B$G$b(B OK
- (char-to-string (aref *regexp-parse-translate* (aref str 0)))
- str))
-
- (defvar *regexp-word-definition* nil)
-
- (defvar *regexp-parse-index* nil)
- (defvar *regexp-parse-end* nil)
- (defvar *regexp-parse-str* nil)
- (defvar *regexp-parse-regno* 1)
-
- (defun regexp-error (&optional reason)
- (if (null reason) (setq reason "Bad regexp"))
- (error "Regexp-parse::%s \"%s\" * \"%s\"" reason (substring *regexp-parse-str* 0 *regexp-parse-index*)
- (substring *regexp-parse-str* *regexp-parse-index*)))
-
- (defun word-parse (pattern)
- (let ((*regexp-word-definition* t))
- (regexp-parse pattern)))
-
- (defun regexp-parse (pattern)
- (let*((*regexp-parse-str* pattern)
- (*regexp-parse-index* 0)
- (*regexp-parse-end* (length pattern))
- (*regexp-parse-regno* 1)
- (result (regexp-parse-0)))
- (if (<= *regexp-parse-end* *regexp-parse-index*)
- result
- (regexp-error))))
-
- (defun regexp-parse-0 ()
- (let* ((result (regexp-parse-1)))
- (cond((<= *regexp-parse-end* *regexp-parse-index*)
- result)
- ((and (< (1+ *regexp-parse-index*) *regexp-parse-end*)
- (= (aref *regexp-parse-str* *regexp-parse-index*) ?\\)
- (= (aref *regexp-parse-str* (1+ *regexp-parse-index*)) ?|))
- (TREX-inc *regexp-parse-index* 2)
- (list ':or result (regexp-parse-0)))
- (t result))))
-
- (defun regexp-parse-1 ()
- (let ((results nil)
- (result2 nil))
- (while (setq result2 (regexp-parse-2))
- (TREX-push result2 results))
- (if results
- (if (cdr results)
- (cons ':seq (nreverse results))
- (car results))
- nil)))
-
- (defun regexp-parse-2 ()
- (let ((result (regexp-parse-3)))
- (while (and (< *regexp-parse-index* *regexp-parse-end*)
- (TREX-memequal (aref *regexp-parse-str* *regexp-parse-index*)
- '(?* ?+ ??)))
- (let ((ch (aref *regexp-parse-str* *regexp-parse-index*)))
- (TREX-inc *regexp-parse-index*)
- (setq result
- (cond((= ch ?*) (list ':star result))
- ((= ch ?+) (list ':plus result))
- ((= ch ??) (list ':optional result))))))
- result))
-
- (defun regexp-parse-3 ()
- (if (<= *regexp-parse-end* *regexp-parse-index*)
- nil
- (let* ((start *regexp-parse-index*)
- (i *regexp-parse-index*)
- (end *regexp-parse-end*)
- (ch (aref *regexp-parse-str* i)))
- (TREX-inc *regexp-parse-index*)
- (cond ((= ch ?.) '(ANYCHAR))
- ((= ch ?^) '(BEGLINE))
- ((= ch ?$) '(ENDLINE))
- ((= ch ?\[)
- (regexp-parse-charset))
- ((= ch ?\])
- (setq *regexp-parse-index* start)
- nil)
- ((= ch ?*)
- (setq *regexp-parse-index* start)
- nil)
- ((= ch ?+)
- (setq *regexp-parse-index* start)
- nil)
- ((= ch ??)
- (setq *regexp-parse-index* start)
- nil)
- ((and (= ch ?\\) (< (1+ i) end))
- (setq ch (aref *regexp-parse-str* (1+ i)))
- (TREX-inc i)
- (TREX-inc *regexp-parse-index*)
- (cond ((= ch ?| )
- (setq *regexp-parse-index* start)
- nil)
- ((= ch ?\( )
- (if (< 9 *regexp-parse-regno*)
- (regexp-error "Too many parenth"))
- (let ((regexp-parse-regno *regexp-parse-regno*))
- (TREX-inc *regexp-parse-regno*)
- (let ((result (regexp-parse-0)))
-
- (cond((and (< (1+ *regexp-parse-index*) *regexp-parse-end*)
- (= (aref *regexp-parse-str* *regexp-parse-index*) ?\\ )
- (= (aref *regexp-parse-str* (1+ *regexp-parse-index*)) ?\) ))
- (TREX-inc *regexp-parse-index* 2)
- (if *regexp-word-definition*
- result
- (list ':mark regexp-parse-regno
- (- *regexp-parse-regno* regexp-parse-regno 1)
- result)))
- (t
- (regexp-error))))))
- ((= ch ?\) )
- (setq *regexp-parse-index* start)
- nil)
- ((= ch ?` ) '(BEGBUF))
- ((= ch ?' ) '(ENDBUF))
- ((= ch ?b )
- (if *regexp-word-definition* (regexp-error) '(WORDBOUND)))
- ((= ch ?B )
- (if *regexp-word-definition* (regexp-error) '(NOTWORDBOUND)))
- ((= ch ?< )
- (if *regexp-word-definition* (regexp-error) '(WORDBEG)))
- ((= ch ?> )
- (if *regexp-word-definition* (regexp-error) '(WORDEND)))
- ((= ch ?w ) (list 'SYNTAXSPEC
- (syntax-spec-code ?w))) ;;;WORDCHAR
- ((= ch ?W ) (list 'NOTSYNTAXSPEC
- (syntax-spec-code ?w))) ;;;NOTWORDCHAR
- ;;; ((= ch ?=) 'AT_DOT)
- ((and (<= ?1 ch)
- (<= ch ?9))
- (if *regexp-word-definition*
- (regexp-error) (list 'DUPLICATE (- ch ?0))))
- ((= ch ?0)
- (regexp-error))
- ((and (= ch ?s )
- (< (1+ i) end))
- (TREX-inc *regexp-parse-index*)
- (list 'SYNTAXSPEC (syntax-spec-code (aref *regexp-parse-str* (1+ i)))))
- ((and (= ch ?S )
- (< (1+ i) end))
- (TREX-inc *regexp-parse-index*)
- (list 'NOTSYNTAXSPEC (syntax-spec-code (aref *regexp-parse-str* (1+ i)))))
- ((and (= ch ?c )
- (< (1+ i) end))
- (TREX-inc *regexp-parse-index*)
- (list 'CATEGORYSPEC (aref *regexp-parse-str* (1+ i))))
- ((and (= ch ?C )
- (< (1+ i) end))
- (TREX-inc *regexp-parse-index*)
- (list 'NOTCATEGORYSPEC (aref *regexp-parse-str* (1+ i))))
- (t
- (regexp-parse-translate-char-string
- (substring *regexp-parse-str* (1+ i) (+ i 2))))))
- (t
- (let ((nextpos (TREX-string-forward-anychar *regexp-parse-str* i)))
- (cond(nextpos
- (setq *regexp-parse-index* nextpos)
- (regexp-parse-translate-char-string
- (substring *regexp-parse-str* i nextpos)))
- (t (regexp-error)))))))))
-
- (defun regexp-parse-charset ()
- (if (< *regexp-parse-index* *regexp-parse-end*)
- (cond((eq (aref *regexp-parse-str* *regexp-parse-index*) ?^)
- (TREX-inc *regexp-parse-index*)
- (regexp-parse-charset0 'CHARSET_NOT nil))
- (t (regexp-parse-charset0 'CHARSET ;; ':or
- nil)))
- (regexp-error)))
-
- (defun regexp-parse-charset0 (op list)
- (if (< *regexp-parse-index* *regexp-parse-end*)
- (cond ((eq (aref *regexp-parse-str* *regexp-parse-index*) ?\])
- (TREX-inc *regexp-parse-index*)
- (regexp-parse-charset1 op '("\]")))
- (t
- (regexp-parse-charset1 op nil)))
- (regexp-error)))
-
- (defun regexp-parse-charset1 (op list)
- (if (< *regexp-parse-index* *regexp-parse-end*)
- (let* ((pos0 *regexp-parse-index*)
- (pos1 (TREX-string-forward-anychar *regexp-parse-str* pos0))
- (pos2 (TREX-string-forward-anychar *regexp-parse-str* pos1))
- (pos3 (TREX-string-forward-anychar *regexp-parse-str* pos2)))
- (if pos0
- ;;; ]
- (cond((eq (aref *regexp-parse-str* pos0) ?\])
- (setq *regexp-parse-index* pos1)
- ;;; returns charset form
- (cons op (sort (nreverse list) 'TREX-charset-lessp)))
- ;;; [^]] - [^]]
- ((and pos1 pos2 pos3
- (eq (aref *regexp-parse-str* pos1) ?-)
- (not (eq (aref *regexp-parse-str* pos2) ?\])))
- (let ((from (substring *regexp-parse-str* pos0 pos1))
- (to (substring *regexp-parse-str* pos2 pos3)))
- (if (and (= (length from) (length to))
- (not (TREX-comp-charp from 0))
- (not (TREX-comp-charp to 0))
- (or (= (length from) 1)
- (= (aref from 0) (aref to 0)))
- (or (string-equal from to) ;;; by Enami 93.08.08
- (string-lessp from to)))
- (if (string-equal from to)
- (TREX-push from list)
- (TREX-push (list ':range from to) list))
- (regexp-error)))
- (setq *regexp-parse-index* pos3)
- (regexp-parse-charset1 op list))
- ;;; [^]] - ] ;;; by Enami 93.08.08
- ((and pos1 pos2
- (eq (aref *regexp-parse-str* pos1) ?-)
- (eq (aref *regexp-parse-str* pos2) ?\]))
- (TREX-push (substring *regexp-parse-str* pos0 pos1) list)
- (TREX-push (substring *regexp-parse-str* pos1 pos2) list)
- (setq *regexp-parse-index* pos2)
- (regexp-parse-charset1 op list))
- (t
- (TREX-push (substring *regexp-parse-str* pos0 pos1) list)
- (setq *regexp-parse-index* pos1)
- (regexp-parse-charset1 op list)))
- (regexp-error)))
- (regexp-error)))
-
- (defun TREX-charset-lessp (ch1 ch2)
- (cond((and (stringp ch1) (stringp ch2))
- (string-lessp ch1 ch2))
- ((and (consp ch1) (consp ch2))
- (string-lessp (nth 2 ch1) (nth 1 ch2)))
- ((consp ch1)
- (string-lessp (nth 2 ch1) ch2))
- ((consp ch2)
- (string-lessp ch1 (nth 1 ch2)))))
-
- ;;;
- ;;; define-regexp
- ;;;
-
- (defmacro define-regexp (name &rest forms)
- (` (define-regexp* '(, name) '(, forms))))
-
- (defun define-regexp* (name forms)
- (put name ':regexp-has-definition t)
- (put name ':regexp-definition
- (if (= (length forms) 1)
- (nth 0 forms)
- (` (:seq (,@ forms))))))
-
- (defun regexp-get-definition (name)
- (get name ':regexp-definition))
-
- (defun regexp-define-specials (names)
- (mapcar (function (lambda (name)
- (put name ':regexp-special t)))
- names))
-
- (defun regexp-has-definition (name)
- (get name ':regexp-has-definition))
-
- (defun regexp-specialp (name)
- (get name ':regexp-special))
-
- (defun regexp-expand-definition (regexp &optional callers)
- (cond
- ((consp regexp)
- (let ((op (car regexp)))
- (cond((eq op ':mark)
- (` (:mark (, (nth 1 regexp))
- (, (nth 2 regexp))
- (, (regexp-expand-definition (nth 3 regexp))))))
- ((eq op ':or)
- (` (:or (,@ (mapcar 'regexp-expand-definition (cdr regexp))))))
- ((eq op ':seq)
- (` (:seq (,@ (mapcar 'regexp-expand-definition (cdr regexp))))))
- ((eq op ':optional)
- (` (:optional (, (regexp-expand-definition (nth 1 regexp))))))
- ((eq op ':star)
- (` (:star (, (regexp-expand-definition (nth 1 regexp))))))
- ((eq op ':plus)
- (` (:plus (, (regexp-expand-definition (nth 1 regexp))))))
- ;;;;****
- ((eq op ':range)
- regexp)
- ((regexp-specialp op)
- regexp)
- ((memq op callers)
- (error "regexp defs(%s)" op))
- ((regexp-has-definition op)
- (regexp-expand-definition (regexp-get-definition op)
- (cons op callers)))
- (t
- (error "undefined regexp(%s)" op)))))
- ((stringp regexp)
- regexp)
- ((null regexp)
- regexp)
- (t
- regexp)))
-
- ;;;
- ;;; regexp-*-lessp
- ;;; $B@55,7A<0$NA4=g=x$rDj5A$9$k!%(B
- ;;;
-
- ;;; nil < number < string < symbol < cons
-
- (defun regexp-lessp (exp1 exp2)
- (cond((equal exp1 exp2)
- nil)
- ((null exp1) t)
- ((numberp exp1)
- (cond((null exp2) nil)
- ((numberp exp2)
- (< exp1 exp2))
- (t t)))
- ((stringp exp1)
- (cond((or (null exp2)
- (numberp exp2))
- nil)
- ((stringp exp2)
- (string< exp1 exp2))
- (t t)))
- ((symbolp exp1)
- (cond((or (null exp2)
- (numberp exp2)
- (stringp exp2))
- nil)
- ((symbolp exp2)
- (string< exp1 exp2))
- (t t)))
- ((consp exp1)
- (cond ((not (consp exp2))
- nil)
- ((< (length exp1) (length exp2))
- t)
- ((= (length exp1) (length exp2))
- (regexp-lessp-list exp1 exp2))
- (t nil)))))
-
- (defun regexp-lessp-list (exp1 exp2)
- (cond((null exp1) nil)
- ((regexp-lessp (car exp1) (car exp2))
- t)
- ((equal (car exp1) (car exp2))
- (regexp-lessp-list (cdr exp1) (cdr exp2)))
- (t nil)))
-
- ;;;
- ;;; item = list of seq-body(== list of regexp)
- ;;; nil < cons
- ;;;
-
- (defun regexp-item-lessp (item1 item2)
- (cond((equal item1 item2)
- nil)
- ((null item2) t)
- ((consp item1)
- (cond((consp item2)
- (cond ((regexp-key-lessp (car item1) (car item2))
- t)
- ((equal (car item1) (car item2))
- (regexp-item-lessp (cdr item1) (cdr item2)))
- (t nil)))
- (t nil)))))
-
-
- (defun regexp-key-lessp-list (sym1 sym2 list)
- (< (TREX-find sym1 list) (TREX-find sym2 list)))
-
- (defun regexp-key-lessp (key1 key2)
- (cond ((regexp-key-class0 key1)
- (cond((regexp-key-class0 key2)
- (regexp-key-lessp-list (car key1) (car key2) *regexp-key-class0*))
- (t t)))
- ((regexp-key-class1 key1)
- (cond((regexp-key-class1 key2)
- (regexp-key-lessp-list key1 key2 *regexp-key-class1*))
- ((or (regexp-key-class2 key2)
- (regexp-key-class3 key2)
- (regexp-key-class4 key2)
- (null key2))
- t)))
- ((regexp-key-class2 key1)
- (cond((regexp-key-class2 key2)
- (regexp-key-lessp-list key1 key2 *regexp-key-class2*))
- ((or (regexp-key-class3 key2)
- (regexp-key-class4 key2)
- (null key2))
- t)))
- ((regexp-key-class3 key1)
- (cond((regexp-key-class3 key2)
- (regexp-key-lessp-list (car key1) (car key2) *regexp-key-class3*))
- ((or (regexp-key-class4 key2)
- (null key2))
- t)))
- ((regexp-key-class4 key1)
- (or (null key2)
- (and (regexp-key-class4 key2) (< key1 key2))))
- (t nil)))
-
- (defun regexp-alist-lessp (pair1 pair2)
- (regexp-key-lessp (car pair1) (car pair2)))
-
- ;;;
- ;;;
- ;;;
-
- (defvar *regexp-key-class0* '(START_MEMORY STOP_MEMORY))
-
- (defvar *regexp-key-class1* '(BEGLINE ENDLINE
- ;;; BEFORE_DOT AT_DOT AFTER_DOT
- BEGBUF ENDBUF
- WORDBEG WORDEND
- WORDBOUND NOTWORDBOUND))
-
- (defvar *regexp-key-class2* '(ANYCHAR
- CHARSET
- CHARSET_NOT
- ;;;WORDCHAR NOTWORDCHAR
- ))
-
- (defvar *regexp-key-class3* '(DUPLICATE
- SYNTAXSPEC NOTSYNTAXSPEC
- CATEGORYSPEC NOTCATEGORYSPEC
- ))
-
- (regexp-define-specials *regexp-key-class0*)
- (regexp-define-specials *regexp-key-class1*)
- (regexp-define-specials *regexp-key-class2*)
- (regexp-define-specials *regexp-key-class3*)
-
- (defun regexp-key-class0 (key)
- (and (consp key) (TREX-memequal (car key) *regexp-key-class0*)))
-
- (defun regexp-key-class1 (key)
- (and (consp key)
- (TREX-memequal (car key) *regexp-key-class1*)))
-
- (defun regexp-key-class2 (key)
- (and (consp key) (TREX-memequal (car key) *regexp-key-class2*)))
-
- (defun regexp-key-class3 (key)
- (and (consp key)
- (TREX-memequal (car key) *regexp-key-class3*)))
-
- (defun regexp-key-class4 (key)
- (or (and (consp key) (eq (car key) ':range))
- (numberp key) (symbolp key)))
-
- (defun regexp-item-key-class0 (item)
- (regexp-key-class0 (car item)))
-
- (defun regexp-item-key-class1 (item)
- (regexp-key-class1 (car item)))
-
- (defun regexp-item-key-class2 (item)
- (regexp-key-class2 (car item)))
-
- (defun regexp-item-key-class3 (item)
- (regexp-key-class3 (car item)))
-
- (defun regexp-item-key-class4 (item)
- (regexp-key-class4 (car item)))
-
- ;;;
- ;;; regexp-sort
- ;;; $B@55,I=8=$NI8=`7A<0$r5a$a$k$?$a$K@0Ns$r9T$&!%(B
- ;;;
-
- (defvar *regexp-sort-flag* t)
- (defvar *regexp-debug* nil)
-
- (defun regexp-sort (list pred)
- (if *regexp-sort-flag*
- (progn
- (if *regexp-debug* (princ (format "(regexp-sort %s %s)\n" list pred)))
- (let ((result (sort list pred)))
- (if *regexp-debug* (princ (format "<== %s\n" result)))
- result))
- list))
-
- ;;;
- ;;; regexp-inverse
- ;;;
-
- (defun regexp-inverse (regexp)
- (if (consp regexp)
- (let ((op (car regexp)))
- (cond((eq op ':mark)
- (list ':mark (nth 1 regexp) (nth 2 regexp)
- (regexp-inverse (nth 3 regexp))))
- ((eq op 'DUPLICATE)
- regexp)
- ((eq op ':or)
- (cons ':or (mapcar 'regexp-inverse (cdr regexp))))
- ((eq op ':seq)
- (cons ':seq (nreverse (mapcar 'regexp-inverse (cdr regexp)))))
- ((eq op ':optional)
- (list ':optional (regexp-inverse (nth 1 regexp))))
- ((eq op ':star)
- (list ':star (regexp-inverse (nth 1 regexp))))
- ((eq op ':plus)
- (list ':plus (regexp-inverse (nth 1 regexp))))
- (t regexp)))
- (if (stringp regexp)
- (TREX-string-reverse regexp)
- regexp)))
-
- ;;;
- ;;; regexp-remove-infinite-loop
- ;;;
-
- (defun regexp-remove-infinite-loop (regexp)
- (cond((consp regexp)
- (let ((op (car regexp)))
- (cond((eq op ':mark)
- )
- ((eq op 'DUPLICATE)
- regexp)
- ((eq op ':or)
- )
- ((eq op ':seq)
- )
- ((eq op ':optional)
- )
- ((eq op ':star)
- )
- ((eq op ':plus)
- )
- (t regexp))))
- ((stringp regexp)
- )
- ((null regexp)
- )
- (t
- regexp)))
-
-
- ;;;
- ;;; regexp-reform
- ;;;
-
- (defvar *regexp-register-definitions* nil)
- (defvar *regexp-registers* nil)
-
- (defun regexp-reform-duplication (regexp)
- (let* ((*regexp-register-definitions* nil)
- (newregexp (regexp-reform-duplication-1 regexp)))
- (let ((*regexp-registers* nil))
- (regexp-reform-duplication-2 newregexp))))
-
- (defun regexp-reform-duplication-1 (regexp)
- (if (not (consp regexp)) regexp
- (let ((mop (car regexp)))
- (cond((eq mop ':or)
- (cons ':or (mapcar 'regexp-reform-duplication-1
- (cdr regexp))))
- ((eq mop ':seq)
- (cons ':seq (mapcar 'regexp-reform-duplication-1
- (cdr regexp))))
- ((TREX-memequal mop '(:star :plus :optional))
- (list mop (regexp-reform-duplication-1 (nth 1 regexp))))
- ((eq mop ':mark)
- (TREX-push (cdr regexp)
- *regexp-register-definitions*)
- (list 'DUPLICATE (nth 1 regexp)))
- (t regexp)))))
-
- (defun regexp-reform-duplication-2 (regexp)
- (if (not (consp regexp)) regexp
- (let ((mop (car regexp)))
- (cond((eq mop ':or)
- (let ((registers *regexp-registers*)
- (newregisters nil)
- (result nil)
- (or-body (cdr regexp)))
- (while or-body
- (setq *regexp-registers* registers)
- (TREX-push (regexp-reform-duplication-2 (car or-body)) result)
- (setq newregisters (TREX-delete-duplicate (append *regexp-registers* newregisters)))
- (setq or-body (cdr or-body)))
- (setq *regexp-registers* newregisters)
- (cons ':or (nreverse result))))
- ((eq mop ':seq)
- (cons ':seq (mapcar 'regexp-reform-duplication-2
- (cdr regexp))))
- ((TREX-memequal mop '(:star :plus :optional))
- (list mop (regexp-reform-duplication-2 (nth 1 regexp))))
- ((eq mop 'DUPLICATE)
- (let ((regno (nth 1 regexp)))
- (if (TREX-memequal regno *regexp-registers*)
- regexp
- (let ((def (assoc regno *regexp-register-definitions*)))
- (TREX-push regno *regexp-registers*)
- ;;; $BBg>fIW!)(B
- (if def
- (cons ':mark def)
- regexp)))))
- (t regexp)))))
-
- ;;;
- ;;; regexp-expand
- ;;;
-
- ;;;
- ;;; <ISLAND> ::= ( <ITEM> ...)
- ;;; <ITEM> ::= ( <SEQ-BODY> ... )
- ;;;
-
- (defun regexp-expand-regexp (regexp)
- ;;; returns island
- (if (consp regexp)
- (let ((mop (car regexp)))
- (cond
- ;;;((eq mop 'CHARSET)
- ;;; (regexp-expand-charset t (cdr regexp)))
- ;;;((eq mop 'CHARSET_NOT)
- ;;; (regexp-expand-charset nil (cdr regexp)))
- ((eq mop ':or)
- (regexp-expand-or (cdr regexp)))
- ((eq mop ':seq)
- (regexp-expand-seq (cdr regexp)))
- ((eq mop ':star)
- (let ((arg (nth 1 regexp)))
- (if arg
- (append (regexp-expand-seq (list arg regexp)) (list nil))
- (list nil))))
- ((eq mop ':plus)
- (let ((arg (nth 1 regexp)))
- (if arg
- (regexp-expand-seq (list arg (list ':star arg)))
- (list nil))))
- ((eq mop ':optional)
- (append (regexp-expand-regexp (nth 1 regexp)) (list nil)))
- ((eq mop ':mark)
- (let ((regno (nth 1 regexp))
- (groups (nth 2 regexp))
- (arg (nth 3 regexp)))
- (if arg
- (list (list (list 'START_MEMORY regno groups)
- arg
- (list 'STOP_MEMORY regno groups)))
- (list (list (list 'START_MEMORY regno groups)
- (list 'STOP_MEMORY regno groups))))))
- (t (list (list regexp)))))
- (cond((null regexp) (list nil))
- ((symbolp regexp) (list (list regexp)))
- ((numberp regexp) (list (list regexp)))
- ((stringp regexp)
- (let ((result nil))
- (let ((i 0) (max (length regexp)))
- (while (< i max)
- (TREX-push (aref regexp i) result)
- (TREX-inc i))
- (list (nreverse result)))))
- (t (list (list regexp))))))
-
- ;;;
- ;;; (CHARSET "abc" ... ) == (:or (:seq "a" "b" "c") .... )
- ;;;
- ;;; (:range "abc" "ade") == (:seq "a" (:range "bc" "de"))
- ;;; (:range "bc" "de" ) == (:or (:seq "b" (:range "c" 0xFF))
- ;;; (:seq (:range "b"+1 "d"-1) (:range 0xA0 0xFF))
- ;;; (:seq "d" (:range 0xA0 "e")))
- ;;;
-
- ;;; charset::
-
- (defun charset-member-elt (ch elt)
- (if (consp elt)
- (if (eq (nth 0 elt) ':range)
- (and (<= ch (nth 1 elt))
- (<= (nth 2 elt) ch))
- nil)
- (equal ch elt)))
-
- (defun charset-member-P (ch or-form)
- (let ((result) (l (cdr or-form)))
- (while (and l (null result))
- (if (charset-membership-elt ch (car l))
- (setq result t))
- (setq l (cdr l)))
- result))
-
- (defun charset-member-N (ch nor-form)
- (not (charset-member+ ch nor-form)))
-
- (defun charset-norp (form)
- (and (consp form) (eq (car form) 'CHARSET_NOT)))
-
- (defun charset-and (form1 form2)
- (if (charset-norp form1)
- (if (charset-norp form2)
- (cons ':or (charset-or-PP (cdr form1) (cdr form2)))
- (charset-and-PN form2 form1))
- (if (charset-norp form2)
- (charset-and-pn form1 form2)
- (charset-and-PP form1 form2))))
-
- (defun charset-or-PP (or-body1 or-body2)
- (append or-body1 or-body2))
-
-
-
-
- (defun regexp-charset-to-regexp (charsets)
- (cons ':or (mapcar 'regexp-charset-to-regexp* charsets)))
-
- (defun regexp-charset-to-regexp* (elm)
- (cond((consp elm) (regexp-charset-range-to-regexp (nth 1 elm) (nth 2 elm)))
- ((stringp elm) (cons ':seq (TREX-string-to-list elm)))
- (t elm)))
-
- (defun regexp-charset-range-to-regexp (str1 str2)
- (let ((result (regexp-charset-range-to-regexp* (TREX-string-to-list str1)
- (TREX-string-to-list str2))))
- (if (= (length result) 1) (car result) (cons ':seq result))))
-
-
- (defun regexp-charset-range-to-regexp* (nums1 nums2)
- (let ((len (length (cdr nums1)))
- (ch1 (car nums1))
- (ch2 (car nums2)))
- (if (= len 0)
- (if (= ch1 ch2) (list ch1)
- (list (regexp-charset-range-1 ch1 ch2)))
- (if (= ch1 ch2)
- (cons ch1 (regexp-charset-range-to-regexp* (cdr nums1) (cdr nums2)))
- (let ((part1 (cons ch1 (regexp-charset-range-to-regexp* (cdr nums1) (make-list (length (cdr nums1)) 255))))
- (part2 (if (<= (1+ ch1) (1- ch2))
- (cons (regexp-charset-range-1 (1+ ch1) (1- ch2))
- (regexp-charset-range-to-regexp* (make-list len 160) (make-list len 255)))
- nil))
- (part3 (cons ch2 (regexp-charset-range-to-regexp* (make-list len 160) (cdr nums2)))))
- (if part2
- (list (list ':or (cons ':seq part1) (cons ':seq part2) (cons ':seq part3)))
- (list (list ':or (cons ':seq part1) (cons ':seq part3)))))))))
-
- (defun regexp-charset-range-1 (from to)
- (let ((result nil))
- (while (<= from to)
- (TREX-push to result)
- (TREX-dec to))
- (cons ':or result)))
-
- (defun regexp-charset-range-1* (from to)
- (if (not (<= from to)) nil
- (cons from (regexp-charset-range-1* (1+ from) to))))
-
- (defvar *regexp-charset-vector* nil)
-
- (defun regexp-expand-charset (mode charsets)
- (TREX-init *regexp-charset-vector* (make-vector 256 nil))
- (let ((i 0))
- (while (< i 256)
- (aset *regexp-charset-vector* i nil)
- (TREX-inc i)))
- (while charsets
- (cond((numberp (car charsets))
- (aset *regexp-charset-vector* (car charsets) t))
- ((stringp (car charsets))
- (if (= (length (car charsets)) 1)
- (aset *regexp-charset-vector* (aref (car charsets) 0) t)
- (let ((list (TREX-string-to-list (car charsets))))
- (aset *regexp-charset-vector* (car list)
- (regexp-expand-charset-set-mark (cdr list)
- (aref *regexp-charset-vector* (car list)))))))
- ((and (consp (car charsets))
- (eq (car (car charsets)) ':range))
- (let ((from (aref (nth 1 (car charsets)) 0))
- (to (aref (nth 2 (car charsets)) 0)))
- (if (<= from to)
- (if (< to 128)
- (let ((char from))
- (while (<= char to)
- (aset *regexp-charset-vector* char t)
- (TREX-inc char)))
- (let ((from-list (TREX-string-to-list (nth 1 (car charsets))))
- (to-list (TREX-string-to-list (nth 2 (car charsets)))))
- ;;; $B$I$&$9$s$N!*(B
- ))))))
- (setq charsets (cdr charsets)))
- (let ((result nil)
- (i 0))
- (while (< i 256)
- (if (eq (aref *regexp-charset-vector* i) mode)
- (TREX-push (list i) result))
- (TREX-inc i))
- (nreverse result)))
-
-
- (defun regexp-expand-charset-set-mark (chars alist)
- (if (null chars) t
- (let ((place (assoc (car chars) alist)))
- (cond((null place)
- (cons
- (cons (car chars)
- (regexp-expand-charset-set-mark (cdr chars) nil))
- alist))
- (t
- (setcdr place
- (regexp-expand-charset-set-mark (cdr chars) (cdr place)))
- alist)))))
-
- (defun regexp-expand-or (regexps)
- (if regexps
- (append (regexp-expand-regexp (car regexps))
- (regexp-expand-or (cdr regexps)))
- nil))
-
- (defun regexp-expand-seq (regexps)
- (if (null regexps)
- (list nil)
- (let ((result (regexp-expand-regexp (car regexps))))
- (if (TREX-memequal nil result)
- (let ((newresult (regexp-expand-seq (cdr regexps))))
- (setq result (TREX-delete nil result))
- (while result
- (TREX-push (append (car result) (cdr regexps)) newresult)
- (setq result (cdr result)))
- newresult)
- (let ((newresult nil))
- (while result
- (TREX-push (append (car result) (cdr regexps)) newresult)
- (setq result (cdr result)))
- newresult)))))
-
- (defun regexp-expand-items (items)
- (if items
- (append (regexp-expand-seq (car items))
- (regexp-expand-items (cdr items)))
- nil))
-
- ;;;
- ;;; regexp-
- ;;;
-
- (defun regexp-make-island (items)
- (let ((result (TREX-delete-duplicate (regexp-expand-items items))))
- (let ((l result))
- (while l
- (cond((null (car l))
- (setcdr l nil)
- (setq l nil))
- (t (setq l (cdr l))))))
- result))
-
- (defun regexp-make-island-parallel (items)
- (regexp-sort (TREX-delete-duplicate (regexp-expand-items items))
- 'regexp-item-lessp))
-
-
- ;;; Finate state Automaton:
- ;;;
- ;;; FA : Non-deterministic FA
- ;;; EFFA : Epsilon Free FA
- ;;; DFA : Deterministic FA
- ;;;
- ;;;
- ;;; DFA-optimize <- DFA <- EFFA <- NDFA <- regexp
-
-
- ;;;
- ;;; Table structure
- ;;; <FA> ::= ( <START> . <TransTables> )
- ;;; <TransTables> ::= ( <Node> . <TransTable> ) ...
- ;;; <TransTable> ::= ( <Key> . <Next> ) ...
- ;;; <Key> ::= <Char> | <Condition> | :epsilon
- ;;;
-
- (defvar *regexp-node-to-transtable* nil)
- (defvar *regexp-island-to-node* nil)
- (defvar *regexp-counter* 0)
-
- (defun FA-make (regexp)
- (setq *regexp-island-to-node* nil)
- (let ((*regexp-node-to-transtable* nil)
- ;;; (*regexp-island-to-node* nil)
- (*regexp-counter* 0))
- (let ((island (regexp-make-island (regexp-expand-regexp regexp))))
- (cons (FA-make-closure island) (nreverse *regexp-node-to-transtable*)))))
-
- (defun FA-make-closure (island)
- (if *regexp-debug* (princ (format "FA-make-closure %s\n" island)))
- (if (null island) nil
- (let ((place (assoc island *regexp-island-to-node*))
- (pos nil))
- (cond(place (cdr place))
- ;;; START_MEMORY and STOP_MEMORY $B!JL5>r7o!$:GM%@h$GA+0\$9$k$b$N!K(B
- ((setq pos (TREX-find-if 'regexp-item-key-class0 island))
- (let ((pre (TREX-firstn island pos))
- (item (nth pos island))
- (post (nthcdr (1+ pos) island)))
- (let* ((number (TREX-inc *regexp-counter*))
- (pair (cons (car item) nil))
- (alist (list pair))
- (place (cons number alist)))
- (TREX-push (cons island number) *regexp-island-to-node*)
- (TREX-push place *regexp-node-to-transtable*)
- (setcdr pair
- (FA-make-closure
- (regexp-make-island (append pre (list (cdr item)) post))))
- number)))
- ;;; BEGLINE, ENDLINE, WORDBEG, ....$B!JD9$5#0$N$b$N!K(B
- ;;; $BA+0\$O(B
- ;;; KEY --> TRUE+FALSE
- ;;; :epsilon --> FALSE $B$H$J$k!%(B
- ((setq pos (TREX-find-if 'regexp-item-key-class1 island))
- (let((key (car (nth pos island)))
- (items island)
- (result-true nil)
- (result-false nil))
- (while items
- (let ((item (car items)))
- (if (equal key (car item))
- (TREX-push (cdr item) result-true)
- (progn
- (TREX-push item result-true)
- (TREX-push item result-false))))
- (setq items (cdr items)))
- (setq result-true (nreverse result-true)
- result-false (nreverse result-false))
- (if (null result-false)
- (let* ((number (TREX-inc *regexp-counter*))
- (pair-true (cons key nil))
- (alist (list pair-true))
- (place (cons number alist)))
- (TREX-push (cons island number) *regexp-island-to-node*)
- (TREX-push place *regexp-node-to-transtable*)
- (setcdr pair-true (FA-make-closure (regexp-make-island result-true)))
- number)
- (let* ((number (TREX-inc *regexp-counter*))
- (pair-true (cons key nil))
- (pair-false (cons ':epsilon nil))
- (alist (list pair-true pair-false))
- (place (cons number alist)))
- (TREX-push (cons island number) *regexp-island-to-node*)
- (TREX-push place *regexp-node-to-transtable*)
- (setcdr pair-true (FA-make-closure (regexp-make-island result-true)))
- (setcdr pair-false (FA-make-closure (regexp-make-island result-false)))
- number))))
- (t
- (FA-make-closure* island (FA-make-pre-alist island)))))))
-
- ;;;
- ;;; $B$3$3$G07$&$N$O(B class2,3,4 $B$N$_(B
- ;;;
- (defun FA-make-closure* (island pre-alist)
- (if *regexp-debug* (princ (format "\nregexp-make-clousre* %s" pre-alist)))
- (let* ((number (TREX-inc *regexp-counter*))
- (place (cons number pre-alist)))
- (TREX-push (cons island number) *regexp-island-to-node*)
- (TREX-push place *regexp-node-to-transtable*)
- (while pre-alist
- (let ((pair (car pre-alist)))
- (setcdr pair
- (FA-make-closure (regexp-make-island (cdr pair)))))
- (setq pre-alist (cdr pre-alist)))
- number))
-
- ;;;
- ;;; PRE-ALIST ::= ( (key . items) ... )
- ;;;
-
- (defun FA-make-pre-alist (items)
- (let ((pre-alist nil))
- (while items
- (let ((item (car items)))
- (cond((or (regexp-key-class2 (car item))
- (regexp-key-class3 (car item)))
- (let ((key (car item))
- (newitems nil))
- (while (and items (equal key (car (car items))))
- (TREX-push (cdr (car items)) newitems)
- (setq items (cdr items)))
- (setq newitems (nreverse newitems))
- (TREX-push (cons key newitems) pre-alist)))
- ((null item)
- (TREX-push (list nil) pre-alist)
- (setq items (cdr items)))
- ((regexp-key-class4 (car item))
- (let((alist nil))
- (while (and items (regexp-key-class4 (car (car items))))
- (let* ((newitem (car items))
- (place (assoc (car newitem) alist)))
- (if place
- (setcdr place
- (cons (cdr newitem) (cdr place)))
- (TREX-push (cons (car newitem) (list (cdr newitem))) alist)))
- (setq items (cdr items)))
- (setq alist (sort alist 'TREX-lessp-car))
- (let ((list alist))
- (while list
- (setcdr (car list) (nreverse (cdr (car list))))
- (setq list (cdr list)))
- (setq pre-alist (append alist pre-alist))
- )))
- (t (error "undefined items(%s)" item)))))
- (nreverse pre-alist)))
-
- ;;;
- ;;; FA-inverse
- ;;;
-
- (defun FA-inverse (FA)
- (let ((invFA nil)
- (start (car FA))
- (table (cdr FA))
- (minnode 10000)
- (maxnode 0)
- (newtable nil)
- (newstart nil)
- (newfinal nil))
- (let ((l table))
- (while l
- (let ((n (car (car l))))
- (if (< n minnode) (setq minnode n))
- (if (< maxnode n) (setq maxnode n)))
- (setq l (cdr l))))
- (setq newstart (1- minnode))
- (setq newfinal (1+ maxnode))
- (setq newtable (FA-link newfinal nil nil newtable))
- (while table
- (let* ((Snode (car table))
- (Snumber (car Snode))
- (Salist (cdr Snode)))
- (while Salist
- (let* ((pair (car Salist))
- (key (car pair))
- (Tnumber (cdr pair)))
- (cond((null key)
- (setq newtable (FA-link newstart ':epsilon Snumber newtable)))
- (t
- (setq newtable (FA-link Tnumber key Snumber newtable))))
- (setq Salist (cdr Salist)))))
- (setq table (cdr table)))
- (setq newtable (FA-link start ':epsilon newfinal newtable))
- ;;;; FA $B$N(B final $B$X(B invFA $B$N(B start $B$+$i(B :epsilon link $B$rD%$k!%(B
- (let ((l newtable))
- (while l
- (setcdr (car l) (reverse (cdr(car l))))
- (setq l (cdr l))))
- (setq newtable (sort newtable 'TREX-lessp-car))
- (cons newstart newtable)))
-
- (defun FA-link (from key to table)
- (let ((place (assoc from table)))
- (cond ((null place )
- (setq place (cons from nil))
- (TREX-push place table)))
- (setcdr place (cons (cons key to) (cdr place)))
- table))
-
- ;;;
- ;;; FA-dump
- ;;;
-
- (defun FA-dump (table)
- (let ((start (car table))
- (l (cdr table)))
- (princ (format "\nstart = %d\n" start))
- (while l
- (princ (format "%3d: " (car (car l))))
- (let ((alist (cdr (car l))))
- (cond ((numberp (car (car alist)))
- (princ (format "%c -> %s\n" (car (car alist)) (cdr (car alist)))))
- ((and (consp (car (car alist))) (TREX-memequal (car (car (car alist))) '(CATEGORYSPEC NOTCATEGORYSPEC)))
- (princ (format "(%s %c) -> %s\n" (car (car (car alist))) (nth 1 (car (car alist))) (cdr (car alist)))))
- (t
- (princ (format "%s -> %s\n" (car (car alist)) (cdr (car alist))))))
- (setq alist (cdr alist))
- (while alist
- (cond ((numberp (car (car alist)))
- (princ (format " %c -> %s\n" (car (car alist)) (cdr (car alist)))))
- ((and (consp (car (car alist))) (TREX-memequal (car (car (car alist))) '(CATEGORYSPEC NOTCATEGORYSPEC)))
- (princ (format " (%s %c) -> %s\n" (car (car (car alist))) (nth 1 (car (car alist))) (cdr (car alist)))))
- (t
- (princ (format " %s -> %s\n" (car (car alist)) (cdr (car alist))))))
- (setq alist (cdr alist))))
- (setq l (cdr l)))))
-
- ;;;
- ;;; EFFA: Epsilon Free Finate Automaton
- ;;;
-
- (defvar *FA-table* nil)
- (defvar *EFFA-table* nil)
-
- (defun EFFA-make (FA)
- (let* ((start (car FA))
- (*FA-table* (cdr FA))
- (newstart start)
- (*EFFA-table* nil))
- (cons newstart (reverse (EFFA-make* start)))))
-
- (defun EFFA-make* (node)
- (let ((place (assoc node *EFFA-table*)))
- (cond((null place)
- (let ((place (cons node nil)))
- (TREX-push place *EFFA-table*)
- (setcdr place
- (reverse (EFFA-make-alist nil (cdr (assoc node *FA-table*))
- (list node))))
- (let ((alist (cdr place)))
- (while alist
- (cond((car (car alist))
- (EFFA-make* (cdr (car alist)))))
- (setq alist (cdr alist))))))))
- *EFFA-table*)
-
- (defun EFFA-make-alist (newalist alist set)
- (while alist
- (let ((node (cdr (car alist))))
- (cond((eq (car (car alist)) ':epsilon)
- (cond((not (TREX-memequal node set))
- (TREX-push node set)
- (setq newalist
- (EFFA-make-alist newalist (cdr (assoc node *FA-table*)) set)))))
- (t
- (TREX-push (car alist) newalist))))
- (setq alist (cdr alist)))
- newalist)
-
- ;;;
- ;;; DFA: Deterministic Finate Automata
- ;;;
-
- (defvar *DFA-node-counter* nil)
-
- (defvar *DFA-node-definitions* nil
- "List of FD-nodes to node number")
-
- (defvar *DFA-table* nil
- "node number to alist")
-
- (defun DFA-make (EFFA)
- (let ((start (car EFFA))
- (*EFFA-table* (cdr EFFA))
- (*DFA-node-counter* 0)
- (*DFA-node-definitions* nil )
- (*DFA-table* nil))
- (DFA-make-1 (list start))
- (cons (cdr (assoc (list start) *DFA-node-definitions*)) *DFA-table*)))
-
- (defun DFA-make-1 (states)
- (let ((place (assoc states *DFA-node-definitions*)))
- (cond((null place)
- (TREX-inc *DFA-node-counter*)
- (setq place (cons states *DFA-node-counter*))
- (TREX-push place *DFA-node-definitions*)
- (let ((pair (cons *DFA-node-counter* nil)))
- (TREX-push pair *DFA-table*)
- (setcdr pair (DFA-make-pre-alist (DFA-collect-alist states)))
- (let ((alist (cdr pair)))
- (while alist
- (let ((top (car alist)))
- (if (car top)
- (setcdr top
- (DFA-make-1 (cdr top)))))
- (setq alist (cdr alist))))
- )))
- (cdr place)))
-
- (defun DFA-collect-alist (states)
- (let ((result nil))
- (while states
- (setq result (append (cdr (assoc (car states) *EFFA-table*)) result))
- (setq states (cdr states)))
- result))
-
- (defun DFA-make-pre-alist (oldAlist)
- (let ((pre-alist nil))
- (while oldAlist
- (let ((oldKey (car (car oldAlist))))
- (cond((or (regexp-key-class0 oldKey)
- (regexp-key-class1 oldKey)
- (regexp-key-class2 oldKey)
- (regexp-key-class3 oldKey))
- (let ((key oldKey)
- (newAlist nil))
- (while (and oldAlist (equal key (car (car oldAlist))))
- (TREX-push (cdr (car oldAlist)) newAlist)
- (setq oldAlist (cdr oldAlist)))
- (setq newAlist (nreverse newAlist))
- (TREX-push (cons key newAlist) pre-alist)))
- ((regexp-key-class4 oldKey)
- (let((alist nil))
- (while (and oldAlist (regexp-key-class4 (car (car oldAlist))))
- (let ((place (assoc (car (car oldAlist)) alist)))
- (if place
- (setcdr place
- (cons (cdr (car oldAlist)) (cdr place)))
- (TREX-push (cons (car (car oldAlist)) (list(cdr (car oldAlist)))) alist)))
- (setq oldAlist (cdr oldAlist)))
- (setq alist (sort alist 'TREX-lessp-car))
- (let ((list alist))
- (while list
- (setcdr (car list) (reverse (cdr (car list))))
- (setq list (cdr list)))
- (setq pre-alist (append alist pre-alist))
- )))
- ((null oldKey)
- (TREX-push (list nil) pre-alist)
- (setq oldAlist (cdr oldAlist)))
- (t
- (setq oldAlist (cdr oldAlist))))))
- (nreverse pre-alist)))
-
- ;;;
- ;;; DFA-optimize
- ;;; $B$3$3$G$N:GE,2=$O>H9g=g=x$rJ]B8$9$k!%(B
- ;;; longer match $B$J$I$r$9$k>l9g$OJQ99$9$kI,MW$,$"$k!%(B
-
- (defvar *DFA-optimize-debug* nil)
-
- (defvar *DFA-optimize-groups* nil)
- (defvar *DFA-optimize-node* 1)
-
- (defun DFA-optimize (FA)
- (if *DFA-optimize-debug* (terpri))
- (let* ((start (car FA))
- (table (cdr FA))
- (*DFA-optimize-node* 1)
- (*DFA-optimize-groups*
- (list (cons *DFA-optimize-node* (mapcar 'car table)))))
- (while
- (catch 'DFA-optimize-changed
- (let ((groups *DFA-optimize-groups*))
- (while groups
- (if *DFA-optimize-debug*
- (princ (format "\nGroups to be checked: %s\n" groups)))
- (let* ((Sgroup (car groups))
- (Sgroup-number (car Sgroup))
- (oldgroup (cdr Sgroup))
- (newgroup nil)
- (Smembers oldgroup))
- (if *DFA-optimize-debug*
- (princ (format " Sgroup-number: %s = %s\n" Sgroup-number Smembers)))
- (while Smembers
- (let* ((Snumber (car Smembers))
- (Salist (cdr (assoc Snumber table))))
- (if *DFA-optimize-debug*
- (princ (format " Snumber: %s\n" Snumber)))
- (let ((Tmembers (cdr Smembers)))
- (while Tmembers
- (if (not (eq Snumber (car Tmembers)))
- (let* ((Tnumber (car Tmembers))
- (Talist (cdr (assoc Tnumber table)))
- (Salist Salist))
- (if *DFA-optimize-debug*
- (princ (format " Tnumber: %s\n" Tnumber)))
- (while (and Talist Salist
- (equal (car (car Talist))
- (car (car Salist))) ;;; key
- (equal (DFA-optimize-group-number
- (cdr (car Talist)))
- (DFA-optimize-group-number
- (cdr (car Salist))) ;;; next group
- ))
- (if *DFA-optimize-debug*
- (progn
- (princ (format " Skey: %s -> %s(%s)\n"
- (car (car Salist))
- (cdr (car Salist))
- (DFA-optimize-group-number (cdr (car Salist)))))
- (princ (format " Tkey: %s -> %s(%s)\n"
- (car (car Talist))
- (cdr (car Talist))
- (DFA-optimize-group-number (cdr (car Talist)))))))
- (setq Talist (cdr Talist)
- Salist (cdr Salist)))
- (cond((or Talist Salist)
- (setq newgroup (cons Tnumber newgroup)
- oldgroup (TREX-delete Tnumber oldgroup))
- (if *DFA-optimize-debug*
- (princ(format " oldGroup : %s\n newGroup : %s\n" oldgroup newgroup)))))
- ))
- (setq Tmembers (cdr Tmembers)))))
- (cond (newgroup
- (if *DFA-optimize-debug*
- (princ (format "Changed :%s --> " Sgroup)))
- (setcdr Sgroup oldgroup)
- (if *DFA-optimize-debug*
- (princ (format "%s" Sgroup)))
- (TREX-inc *DFA-optimize-node*)
- (if *DFA-optimize-debug*
- (princ (format "+%s\n" (cons *DFA-optimize-node* newgroup))))
- (TREX-push (cons *DFA-optimize-node* newgroup) *DFA-optimize-groups*)
- (throw 'DFA-optimize-changed t)))
- (setq Smembers (cdr Smembers))))
- (setq groups (cdr groups))))))
- ;;;
- ;;;
- (if *DFA-optimize-debug*
- (princ (format "table: %s\n" table)))
- (if *DFA-optimize-debug*
- (princ (format "groups: %s\n" *DFA-optimize-groups*)))
- (let ((newtable nil)
- (newstart nil)
- (groups *DFA-optimize-groups*))
-
- ;;; start node $B$rC5$9(B
- (let ((l *DFA-optimize-groups*))
- (while l
- (cond((TREX-memequal start (cdr (car l)))
- (setq newstart (car (car l)))
- (setq l nil))
- (t
- (setq l (cdr l))))))
-
- ;;; $B?7$7$$(B transTable $B$r:n$k!%(B
- (while groups
- (let* ((group (car groups))
- (group-number (car group))
- (member-number (car (cdr group)))
- (member-alist (cdr (assoc member-number table))))
- (TREX-push (cons group-number
- (let ((group-alist nil))
- (while member-alist
- (let ((Mkey (car (car member-alist)))
- (Mnext (cdr (car member-alist))))
- (TREX-push (cons Mkey (DFA-optimize-group-number Mnext))
- group-alist))
- (setq member-alist (cdr member-alist)))
- (nreverse group-alist)))
- newtable)
- (setq groups (cdr groups))))
- (cons newstart newtable))))
-
- (defun DFA-optimize-group-number (node)
- (let ((l *DFA-optimize-groups*) (result nil))
- (while l
- (cond((TREX-memequal node (cdr (car l)))
- (setq result (car (car l))
- l nil))
- (t (setq l (cdr l)))))
- result))
-
- (defun DFA-optimize-parallel (FA)
- (if *DFA-optimize-debug* (terpri))
- (let* ((start (car FA))
- (table (cdr FA))
- (*DFA-optimize-node* 1)
- (*DFA-optimize-groups*
- (list (cons *DFA-optimize-node* (mapcar 'car table)))))
- (while
- (catch 'DFA-optimize-changed
- (let ((groups *DFA-optimize-groups*))
- (while groups
- (if *DFA-optimize-debug*
- (princ (format "\nGroups to be checked: %s\n" groups)))
- (let* ((Sgroup (car groups))
- (Sgroup-number (car Sgroup))
- (oldgroup (cdr Sgroup))
- (newgroup nil)
- (Smembers oldgroup))
- (if *DFA-optimize-debug*
- (princ (format " Sgroup-number: %s = %s\n" Sgroup-number Smembers)))
- (while Smembers
- (let* ((Snumber (car Smembers))
- (Salist (cdr (assoc Snumber table))))
- (if *DFA-optimize-debug*
- (princ (format " Snumber: %s\n" Snumber)))
- (while Salist
- (let* ((Spair (car Salist))
- (Skey (car Spair))
- (Snext (cdr Spair))
- (Snext-group (DFA-optimize-group-number Snext))
- (Tmembers oldgroup))
- (if *DFA-optimize-debug*
- (princ (format " Skey: %s -> %s(%s)\n" Skey Snext-group Snext)))
- (while Tmembers
- (if (not (eq Snumber (car Tmembers)))
- (let* ((Tnumber (car Tmembers))
- ;;; $BMW:F8!F$(B
- (Tpair (assoc Skey (cdr (assoc Tnumber table))))
- (Tnext (cdr Tpair))
- (Tnext-group (DFA-optimize-group-number (cdr Tpair))))
- (if *DFA-optimize-debug*
- (princ (format " Tnumber: %s : %s -> %s(%s)\n" Tnumber (car Tpair)
- (DFA-optimize-group-number (cdr Tpair))(cdr Tpair))))
- (cond((and (equal Spair '(nil))
- (equal Tpair '(nil))))
- ((and Skey (equal Snext-group Tnext-group)))
- (t
- (TREX-push Tnumber newgroup)
- (setq oldgroup (TREX-delete Tnumber oldgroup))
- (if *DFA-optimize-debug*
- (princ(format (format " oldGroup : %s\n newGroup : %s\n" oldgroup newgroup))))
- ))))
- (setq Tmembers (cdr Tmembers)))
- (cond (newgroup
- (if *DFA-optimize-debug*
- (princ (format "Changed :%s --> " Sgroup)))
- (setcdr Sgroup oldgroup)
- (if *DFA-optimize-debug*
- (princ (format "%s" Sgroup)))
- (TREX-inc *DFA-optimize-node*)
- (if *DFA-optimize-debug*
- (princ (format "+%s\n" (cons *DFA-optimize-node* newgroup))))
- (TREX-push (cons *DFA-optimize-node* newgroup) *DFA-optimize-groups*)
- (throw 'DFA-optimize-changed t))))
- (setq Salist (cdr Salist))))
- (setq Smembers (cdr Smembers))))
- (setq groups (cdr groups))))))
- ;;;
- ;;;
- (if *DFA-optimize-debug*
- (princ (format "table: %s\n" table)))
- (if *DFA-optimize-debug*
- (princ (format "groups: %s\n" *DFA-optimize-groups*)))
- (let ((newtable nil)
- (newstart nil)
- (groups *DFA-optimize-groups*))
-
- ;;; start node $B$rC5$9(B
- (let ((l *DFA-optimize-groups*))
- (while l
- (cond((TREX-memequal start (cdr (car l)))
- (setq newstart (car (car l)))
- (setq l nil))
- (t
- (setq l (cdr l))))))
-
- ;;; $B?7$7$$(B transTable $B$r:n$k!%(B
- (while groups
- (let* ((group (car groups))
- (group-number (car group))
- (member-number (car (cdr group)))
- (member-alist (cdr (assoc member-number table))))
- (TREX-push (cons group-number
- (let ((group-alist nil))
- (while member-alist
- (let ((Mkey (car (car member-alist)))
- (Mnext (cdr (car member-alist))))
- (TREX-push (cons Mkey
- (if (consp Mnext)
- (cons (DFA-optimize-group-number (car Mnext))
- (DFA-optimize-group-number (cdr Mnext)))
- (DFA-optimize-group-number Mnext)))
- group-alist))
- (setq member-alist (cdr member-alist)))
- group-alist))
- newtable)
- (setq groups (cdr groups))))
- (cons newstart newtable))))
-
-
-
- ;;;
- ;;; Non Empty Finite Automata
- ;;;
-
- (defun NEFA-make (EFFA)
- (let* ((start (car EFFA))
- (table (cdr EFFA))
- (Salist (cdr (assoc start table))))
- (cond((equal Salist '((nil)))
- nil)
- ((and (assoc nil Salist)
- (progn
- (while (and Salist (not (equal start (cdr (car Salist)))))
- (setq Salist (cdr Salist)))
- Salist))
- (let ((min 10000)
- (max -10000)
- (l table))
- (while l
- (if (< (car (car l)) min)
- (setq min (car (car l))))
- (if (< max (car (car l)))
- (setq max (car (car l))))
- (setq l (cdr l)))
- (let* ((newstart (1- min))
- (newtable (copy-alist table))
- (oldSalist (cdr (assoc start table)))
- (newSalist (TREX-delete '(nil) (copy-alist oldSalist))))
- (cons newstart
- (cons (cons newstart newSalist) newtable)))))
- (t
- EFFA))))
-
- ;;;
- ;;; Simplify FA
- ;;;
-
- (defvar *FA-simplify-table* nil)
-
- (defun FA-simplify (FA)
- (let ((start (car FA))
- (table (cdr FA))
- (newtable nil)
- (*FA-simplify-table* nil))
- (FA-simplify-mark start table)
- (while *FA-simplify-table*
- (TREX-push (assoc (car *FA-simplify-table*) table) newtable)
- (setq *FA-simplify-table* (cdr *FA-simplify-table*)))
- (cons start newtable)))
-
- (defun FA-simplify-mark (node table)
- (cond ((not (TREX-memequal node *FA-simplify-table*))
- (TREX-push node *FA-simplify-table*)
- (let ((alist (cdr (assoc node table))))
- (while alist
- (cond((car (car alist))
- (FA-simplify-mark (cdr (car alist)) table)))
- (setq alist (cdr alist)))))))
-
- ;;;
- ;;; Shortest match DFA
- ;;;
-
- (defun DFA-shortest-match (DFA)
- (let ((start (car DFA))
- (table (cdr DFA))
- (newtable nil))
- (while table
- (cond ((assoc nil (cdr (car table)))
- (TREX-push (cons (car (car table)) '((nil))) newtable))
- (t
- (TREX-push (car table) newtable)))
- (setq table (cdr table)))
- (cons start newtable)))
-
- ;;;
- ;;; Fastmap computation
- ;;;
-
- (defvar *DFA-fastmap-chars* nil)
- (defvar *DFA-fastmap-syntax* nil)
- (defvar *DFA-fastmap-category* nil)
- (defvar *DFA-fastmap-init* 0 )
- (defvar *DFA-fastmap-pos* 1 ) ;;; SYNTAXSPEC or CATEGORYSPEC
- (defvar *DFA-fastmap-neg* 2 ) ;;; NOTSYNTAXSPEC or NOTCATEGORYSPEC
-
- ;;;; $B$9$Y$F$N(B char $B$OB~0l$D$N(B syntaxspec $B$KB0$9$k(B
- ;;;; ==> syntaxspec(ch) and notsyntaxspec(ch) --> all char
- ;;;; ==> notsyntaxspec(ch1) and notsyntaxspec(ch2) --> all char
- ;;;; ==> notsyntaxspec(ch1) and syntaxspec(ch2) == notsyntaxspec(ch1)
- ;;;; $B$D$^$j(B notsyntaxspec $B$O9b!9#1$D$7$+$J$$!%(B
-
- ;;; Returns [ CODE FASTMAP SYNTAX-FASTMAP CATEGOY-FASTMAP ]
-
- (defun DFA-code-with-fastmap (DFA)
- (TREX-init *DFA-fastmap-chars* (make-vector 256 nil))
- (TREX-init *DFA-fastmap-syntax* (make-vector 256 nil))
- (TREX-init *DFA-fastmap-category* (make-vector 256 nil))
- (let ((code (regexp-code-gen DFA))
- (start (car DFA))
- (*DFA-fastmap-table* (cdr DFA))
- (*DFA-fastmap-mark* nil)
- (*DFA-fastmap-special* nil))
- (let ((i 0))
- (while (< i 256)
- (aset *DFA-fastmap-chars* i nil)
- (aset *DFA-fastmap-syntax* i nil)
- (aset *DFA-fastmap-category* i nil)
- (TREX-inc i)))
- (DFA-fastmap-collect start)
- (let ((fastmap (if *DFA-fastmap-special*
- nil ;;;(make-string 256 1)
- (make-string 256 0)))
- (fastmap-entries 0)
- (syntax (if *DFA-fastmap-special*
- nil
- (make-string 256 0)))
- (syntax-entries 0)
- (notsyntax-entries 0)
- (category (if *DFA-fastmap-special*
- nil
- (make-string 256 0)))
- (category-entries 0))
- (let ((result (make-vector 4 nil)))
- (aset result 0 code)
- (if *DFA-fastmap-special*
- (progn
- (aset result 1 fastmap)
- (aset result 2 syntax)
- (aset result 3 category))
- (progn
- (let ((i 0))
- (while (< i 256)
- (if (aref *DFA-fastmap-chars* i)
- (progn
- (TREX-inc fastmap-entries)
- (aset fastmap i 1)))
- (aset syntax i
- (cond((null (aref *DFA-fastmap-syntax* i))
- *DFA-fastmap-init*)
- ((eq (aref *DFA-fastmap-syntax* i) 'SYNTAXSPEC)
- (TREX-inc syntax-entries)
- *DFA-fastmap-pos*)
- ((eq (aref *DFA-fastmap-syntax* i) 'NOTSYNTAXSPEC)
- (TREX-inc notsyntax-entries)
- (TREX-inc syntax-entries)
- *DFA-fastmap-neg*)))
- (aset category i
- (cond((null (aref *DFA-fastmap-category* i))
- *DFA-fastmap-init*)
- ((eq (aref *DFA-fastmap-category* i) 'CATEGORYSPEC)
- (TREX-inc category-entries)
- *DFA-fastmap-pos*)
- ((eq (aref *DFA-fastmap-category* i) 'NOTCATEGORYSPEC)
- (TREX-inc category-entries)
- *DFA-fastmap-neg*)))
- (TREX-inc i)))
-
- (cond((<= 2 notsyntax-entries)
- (setq fastmap (make-string 256 1)
- syntax nil
- category nil))
- ((= 1 notsyntax-entries)
- (let ((ch 0))
- (while (< ch 256)
- (if (= (aref syntax ch) *DFA-fastmap-neg*)
- (aset syntax ch *DFA-fastmap-init*)
- (aset syntax ch *DFA-fastmap-pos*))
- (TREX-inc ch)))))
- (aset result 1 fastmap)
- (aset result 2 syntax)
- (aset result 3 category)))
- result))))
-
- (defun DFA-fastmap-collect (node)
- (if (TREX-memequal node *DFA-fastmap-mark*) nil
- (let ((alist (cdr (assoc node *DFA-fastmap-table*))))
- (TREX-push node *DFA-fastmap-mark*)
- (while alist
- (let ((key (car (car alist))))
- (cond((numberp key)
- (aset *DFA-fastmap-chars* key t))
- ((symbolp key);;; can be null
- (setq *DFA-fastmap-special* t))
- (t
- (let ((op (car key)))
- (cond
- ((TREX-memequal op '(START_MEMORY STOP_MEMORY))
- (DFA-fastmap-collect (cdr (car alist))))
- ((TREX-memequal op '(SYNTAXSPEC NOTSYNTAXSPEC))
- (let ((specch (syntax-code-spec (nth 1 key))))
- (cond((null (aref *DFA-fastmap-syntax* (nth 1 key)))
- (aset *DFA-fastmap-syntax* specch op))
- ((not (eq (aref *DFA-fastmap-syntax* specch) op))
- (setq *DFA-fastmap-special* t)))))
- ((TREX-memequal op '(CATEGORYSPEC NOTCATEGORYSPEC))
- (let ((specch (nth 1 key)))
- (cond((null (aref *DFA-fastmap-category* specch))
- (aset *DFA-fastmap-category* specch op))
- ((not (eq (aref *DFA-fastmap-category* specch) op))
- (setq *DFA-fastmap-special* t)))))
- ((TREX-memequal op '(CHARSET CHARSET_NOT))
- (let ((list (cdr key)))
- (while list
- (let ((from nil) (to nil))
- (cond((stringp (car list))
- (setq from (aref (car list) 0)
- to (aref (car list) 0)))
- (t ;;; :range
- (setq from (aref (nth 1 (car list)) 0)
- to (aref (nth 2 (car list)) 0))))
- (while (<= from to)
- (cond((null (aref *DFA-fastmap-chars* from))
- (aset *DFA-fastmap-chars* from
- (if (eq op 'CHARSET_NOT) 'CHARSET_NOT
- t))))
- (TREX-inc from)))
- (setq list (cdr list))))
- (if (eq op 'CHARSET_NOT)
- (let ((i 0))
- (while (< i 256)
- (cond((null (aref *DFA-fastmap-chars* i))
- (aset *DFA-fastmap-chars* i t))
- ((eq (aref *DFA-fastmap-chars* i) 'CHARSET_NOT)
- (aset *DFA-fastmap-chars* i nil)))
- (TREX-inc i)))))
- (t
- (setq *DFA-fastmap-special* t)))))))
- (setq alist (cdr alist))))))
-
- ;;;
- ;;; $B@55,I=8=%3!<%I$NL?NaI=(B
- ;;;
-
- (if (= regexp-version 19)
- (TREX-define-enum
- UNUSED ;;; 18
- EXACTN ;;; 18
- ANYCHAR ;;; 18
- CHARSET ;;; 18
- CHARSET_NOT ;;; 18
- START_MEMORY ;;; 18*
- STOP_MEMORY ;;; 18*
- DUPLICATE ;;; 18
- BEGLINE ;;; 18
- ENDLINE ;;; 18
- BEGBUF ;;; 18
- ENDBUF ;;; 18
- JUMP ;;; 18
- JUMP_PAST_ALT ;;; 19
- ON_FAILURE_JUMP ;;; 18
- ON_FAILURE_KEEP_STRING_JUMP ;;; 19
- ;;;; finalize_jump
- ;;;; maybe_finalize_jump
- POP_FAILURE_JUMP ;;; 19
- MAYBE_POP_JUMP ;;; 19
- DUMMY_FAILURE_JUMP ;;; 18
- PUSH_DUMMY_FAILURE ;;; 19
- SUCCEED_N ;;; 19
- JUMP_N ;;; 19
- SET_NUMBER_AT ;;; 19
- WORDCHAR ;;; 18
- NOTWORDCHAR ;;; 18
- WORDBEG ;;; 18
- WORDEND ;;; 18
- WORDBOUND ;;; 18
- NOTWORDBOUND ;;; 18
- BEFORE_DOT ;;; 18
- AT_DOT ;;; 18
- AFTER_DOT ;;; 18
- SYNTAXSPEC ;;; 18
- NOTSYNTAXSPEC ;;; 18
- ;;; TREX code
- EXACT1
- EXACT2
- EXACT3
- CHARSET_M
- CHARSET_M_NOT
- CASEN
- SUCCESS_SHORT
- SUCCESS
- POP
- EXCEPT0
- EXCEPT1
- CATEGORYSPEC
- NOTCATEGORYSPEC
- RANGE
- RANGE_A
- )
- ;; else regexp-version == 18.
- (TREX-define-enum
- UNUSED
- EXACTN
- BEGLINE
- ENDLINE
- JUMP
- ON_FAILURE_JUMP
- FINALIZE_JUMP
- MAYBE_FINALIZE_JUMP
- DUMMY_FAILURE_JUMP
- ANYCHAR
- CHARSET
- CHARSET_NOT
- START_MEMORY
- STOP_MEMORY
- DUPLICATE
- BEFORE_DOT ;;; not used
- AT_DOT ;;; not used
- AFTER_DOT ;;; not used
- BEGBUF
- ENDBUF
- WORDCHAR ;;; not used
- NOTWORDCHAR ;;; not used
- WORDBEG
- WORDEND
- WORDBOUND
- NOTWORDBOUND
- SYNTAXSPEC
- NOTSYNTAXSPEC
- ;;;
- ;;; extended instructions
- ;;;
- EXACT1
- EXACT2
- EXACT3
- CHARSET_M
- CHARSET_M_NOT
- CASEN
- SUCCESS_SHORT ;;; == ON_FAILURE_SUCCESS
- SUCCESS
- POP
- EXCEPT0 ;;; ALLCHAR
- EXCEPT1
- CATEGORYSPEC
- NOTCATEGORYSPEC
- ))
-
- (defvar ON_FAILURE_SUCCESS SUCCESS_SHORT)
-
- ;;;
- ;;; ANYCHAR = EXCEPT1 \n
- ;;; ALLCHAR = EXCEPT0
-
-
- ;;;
- ;;; $B@55,I=8=>H9g4o$NL?NaBN7O(B
- ;;;
- ;;; UNUSED
- ;;; EXACTN n ch1 ch2 ... chn
- ;;; BEGLINE
- ;;; ENDLINE
- ;;; JUMP disp[2]
- ;;; +JUMP_PAST_ALT disp[2]
- ;;; ON_FAILURE_JUMP disp[2]
- ;;; +ON_FAILURE_KEEP_STRING_JUMP disp[2]
- ;;; -FINALIZE_JUMP disp[2]
- ;;; -MAYBE_FINALIZE_JUMP disp[2]
- ;;; +POP_FAILURE_JUMP disp[2]
- ;;; +MAYBE_POP_JUMP disp[2]
- ;;; DUMMY_FAILURE_JUMP disp[2]
- ;;; +PUSH_DUMMY_FAILURE
- ;;; +SUCCEED_N disp[2] n[2]
- ;;; +JUMP_N disp[2] n[2]
- ;;; +SET_NUMBER_AT disp[2] n[2]
- ;;; ANYCHAR
- ;;; CHARSET n b1 b2 ... bn
- ;;;**CHARSET 0xff l1 l2 cf1 ct1 cf2 ct2 ... cfn ctn
- ;;; CHARSET_NOT n b1 b2 ... bn
- ;;;**CHARSET_NOT 0xff l1 l2 cf1 ct1 cf2 ct2 ... cfn ctn
- ;;; $B0J2<$O$($J$_;a$NDs0F$K$h$k?7$?$J%;%^%s%F%#%C%/%9(B
- ;;
- ;;; CHARSET n b1 b2 ... bn (n < 0x80)
- ;;; CHARSET n+0x80 b1 b2 ... bn
- ;;; |<-- n bytes -->|
- ;;; lh lo CHARF1 CHART1 .... CHARFm CHARTm
- ;;; |<- lh << 8 + lo bytes ->|
- ;; CHARSET n b1 b2 ... bn lh lo cf1 ct1 cf2 ct2 ... cfn ctn
- ;; |<- bitmap ->| |<- range table ->|
- ;; CHARSET_NOT n b1 b2 ... bn lh lo cf1 ct1 cf2 ct2 ... cfn ctn
- ;; CHARSETM m n b1 b2 ... bn lh lo cf1 ct1 cf2 ct2 ... cfn ctn
- ;; CHARSETM_NOT m n b1 b2 ... bn lh lo cf1 ct1 cf2 ct2 ... cfn ctn
- ;;
- ;; o cfx, ctx $B0J30$O$9$Y$F(B 1byte. cfx, ctx $B$O(B multi byte
- ;; character.
- ;;
- ;; o CHARSET(_NOT) $B$H(B CHARSETM(_NOT) $B$H$N0c$$$O(B, CHARSETM(_NOT)
- ;; $B$N>l9g$K$O(B bitmap $B$N@hF,$N(B m bytes $B$,>J$+$l$F$$$kE@(B.
- ;;
- ;; o b1 ... bn ($B$D$^$j(B bitmap$B$ND9$5(B)$B$O(B, (n & 0x7f) bytes. n $B$N(B
- ;; $BJ,(B 1byte $B$O4^$^$J$$(B.
- ;;
- ;; o lh $B0J2<$O(B n & 0x80 $B$,(B 0 $B$J$iB8:_$7$J$$(B.
- ;;
- ;; o lh $B$+$i(B ctn $B$^$G$ND9$5(B($B$D$^$j(B range table $B$ND9$5(B) $B$O(B ((lh
- ;; << 8) + lo) byte. lh $B$H(B lo $B$N(B 2byte $B$r4^$`(B. ($B>e$N(B n $B$N>l(B
- ;; $B9g$H0c$$$^$9$,(B, $BE}0l$7$?$[$&$,$$$$$+$J(B?).
- ;;
- ;; o cfx $B$O(B multi byte character $B$G(B, cfx $B$H(B ctx $B$N(B leading char
- ;; $B$OF1$8$G$J$$$H$$$1$J$$(B. $B$^$?(B, cfx $B$N(B leading char $B$O(B 0 $B$G(B
- ;; $B$"$C$F$O$$$1$J$$(B(range table $B$K(B leading char $B$,(B 0 (ASCII$B$H(B
- ;; $B$+(B) $B$NJ8;z$,$"$C$F$b(B, $B8=:_$O(B fastmap $B$KH?1G$5$l$J$$$+$i(B).
- ;;
- ;;; START_MEMORY regno
- ;;; STOP_MEMORY regno
- ;;; o emacs 19 $B$N(B regex.c $B$G$O(B,
- ;;; START_MEMORY regno groupno
- ;;; STOP_MEMORY regno groupno
- ;;; groupno $B$O<+J,$h$j2<$N%l%Y%k$N%0%k!<%W$N?t(B
- ;;;
- ;;; DUPLICATE regno
- ;;; BEFORE_DOT ;;; not used
- ;;; AT_DOT ;;; not used
- ;;; AFTER_DOT ;;; not used
- ;;; BEGBUF
- ;;; ENDBUF
- ;;; WORDCHAR ;;; not used
- ;;; NOTWORDCHAR ;;; not used
- ;;; WORDBEG
- ;;; WORDEND
- ;;; WORDBOUND
- ;;; NOTWORDBOUND
- ;;; SYNTAXSPEC ch
- ;;; NOTSYNTAXSPEC ch
-
- ;;;
- ;;; $B3HD%L?Na!J(BTREX$B$G;HMQ$9$k$b$N!K(B
- ;;;
- ;;; EXACT1 ch
- ;;; EXACT2 ch1 ch2
- ;;; EXACT3 ch1 ch2 ch3
- ;;; CHARSETM m n b1 b2 .. bn
- ;;; charset $B$N(B bitmaps $B$N$&$A@hF,$N(B m bytes $B$r>J$$$?$b$N(B
- ;;; CHARSETM_NOT m n b1 b2 .. bn
- ;;; charset_not $B$N(B bitmaps $B$N$&$A@hF,$N(B m bytes $B$r>J$$$?$b$N(B
- ;;; CASEN n disp[1] disp[2] ... disp[n] l u ind[l] ... ind[u]
- ;;; $B:G=i$K(B n $B8D$N(B jump relative address(2bytes) $B$,B3$-!$(B
- ;;; $B<!$K(Bcharacter code l $B$+$i(B m $B$^$G$NJ,$N(Bindex(1byte)$B$,B3$/!%(B
- ;;; ON_FAILURE_SUCCESS
- ;;; alternative stack $B$r6u$K$7!$(Bpend $B$r(B push $B$9$k!%(B
- ;;; SUCCESS
- ;;; pend $B$X%8%c%s%W$9$k!%(B
- ;;; POP
- ;;; alternative stack $B$r(B pop $B$9$k!%(B
-
- ;;; RANGE ch1 ch2
- ;;; RANGE_A == RANGE 0xA0 0xFF
-
-
- ;;; [^$B&A(B]$B&B(B\|$B&C(B $B$N0UL#!'(B
- ;;; on_failure_jump L1
- ;;; on_failure_jump L2
- ;;; $B&A(B
- ;;; pop
- ;;; fail
- ;;; L1: ALLCHAR
- ;;; $B&B(B
- ;;; L2: pop
- ;;; $B&C(B
-
- ;;;
- ;;; regexp-code-*
- ;;;
-
- (defvar *regexp-code-buffer* (get-buffer-create " *regexp-code-buffer*"))
-
- (defun regexp-code-gen (FA)
- (let ((start (car FA))
- (table (cdr FA))
- (*table* (cdr FA))
- (*labels* nil)
- (*final* nil)
- (*counter* 0))
- (let ((list table))
- (while (and list (null *final*))
- (if (equal '((nil)) (cdr (car list)))
- (setq *final* (car (car list))))
- (setq list (cdr list))))
- (cond((null *final*)
- (setq *final* (1+ (length table)))
- (setq *counter* (1+ *final*)))
- (t
- (setq *counter* (1+ (length table)))))
- (save-excursion
- (set-buffer *regexp-code-buffer*)
- (let ((kanji-flag nil)
- (mc-flag nil))
- (erase-buffer)
- (regexp-code-gen* start)
- (buffer-substring (point-min) (point-max)))
- )))
-
- (defun regexp-code-gen* (node)
- (cond((= node *final*)
- (if (null (assoc node *labels*))
- (TREX-push (cons node (point)) *labels*))
- (insert SUCCESS))
- ((null (assoc node *labels*))
- (TREX-push (cons node (point)) *labels*)
- (let ((alist (cdr (assoc node *table*))))
- (cond((equal '((nil)) alist)
- (insert SUCCESS))
- (t (regexp-code-gen-alist alist)))))
- (t
- (let ((disp (- (cdr (assoc node *labels*)) (+ (point) 3))))
- (insert JUMP
- (logand disp 255)
- (/ (logand disp (* 255 256)) 256))))))
-
- (defvar *regexp-charset-table* nil)
- (defvar *regexp-case-table* nil)
-
- (defun regexp-code-gen-alist (alist)
- (TREX-init *regexp-charset-table* (make-vector 256 nil))
- (TREX-init *regexp-case-table* (make-vector 256 nil))
- (if (eq (car (car alist)) nil)
- nil
- (let ((nextalist alist)
- (numberkey nil)
- (point nil)
- (min 256) (max -1) (nexts nil) (nodealist nil))
- (cond((numberp (car (car alist)))
- (setq numberkey t)
- (let ((i 0))
- (while (< i 256)
- (aset *regexp-case-table* i nil)
- (TREX-inc i)))
-
- (while (and nextalist
- (numberp (car (car nextalist))))
- (let ((ch (car (car nextalist)))
- (next (cdr (car nextalist))))
- (let ((place (assoc next nodealist)))
- (if place
- (setcdr place
- (cons ch (cdr place)))
- (TREX-push (cons ch (list next)) nodealist)))
- (aset *regexp-case-table* ch next)
- (if (< ch min) (setq min ch))
- (if (< max ch) (setq max ch))
- (if (not (TREX-memequal next nexts))
- (TREX-push next nexts)))
- (setq nextalist (cdr nextalist))))
- (t (setq nextalist (cdr alist))))
-
- (if nextalist
- (cond((eq (car (car nextalist)) nil)
- (insert ON_FAILURE_SUCCESS )) ;;; SUCCESS_SHORT
- (t
- (insert ON_FAILURE_JUMP 0 0)
- (setq point (point)))))
-
- (cond(numberkey
- (cond((= min max)
- ;;; exact1
- (regexp-code-gen-exact (list min) (car nexts)))
-
- ((= (length nexts) 1)
- ;;; charset or charset_not
- (if (= (length alist) 256)
- (insert EXCEPT0) ;92.10.26 by T.Saneto
- (let ((not_min 256)
- (not_max -1)
- (ch 0)
- (mode (car nexts)))
- (while (< ch 256)
- (cond((null (aref *regexp-case-table* ch))
- (if (< ch not_min) (setq not_min ch))
- (if (< not_max ch) (setq not_max ch))))
- (TREX-inc ch))
- (if (<= (- not_max not_min) (- max min))
- (setq min not_min
- max not_max
- mode nil))
- (let ((minb (/ min 8))
- (maxb (1+ (/ max 8))))
- (insert (if mode CHARSET_M CHARSET_M_NOT) minb (- maxb minb))
- (let ((b minb))
- (while (< b maxb)
- (let ((i 7) (bits 0))
- (while (<= 0 i)
- (if (eq (aref *regexp-case-table* (+ (* 8 b) i))
- mode)
- ;;;; bits table$B$N=g=x$O<!$NDL$j(B
- (TREX-inc bits (aref [1 2 4 8 16 32 64 128] i)))
- (TREX-dec i))
- (insert bits))
- (TREX-inc b))))))
- (regexp-code-gen* (car nexts)))
- (t
- ;;; case
- (let ((point nil))
- (insert CASEN)
- (insert (length nexts))
- (setq point (point))
- (let ((list nexts))
- (while list
- (insert 0 0)
- (setq list (cdr list))))
- (insert min max)
- (let ((ch min))
- (while (<= ch max)
- (if (aref *regexp-case-table* ch)
- (insert (1+ (TREX-find (aref *regexp-case-table* ch) nexts)))
- (insert 0))
- (TREX-inc ch)))
- (let ((list nexts))
- (while list
- (if (null (assoc (car list) *labels*))
- (regexp-code-gen* (car list)))
- (setq list (cdr list))))
- (save-excursion
- (goto-char point)
- (let ((list nexts))
- (while list
- (delete-char 2)
- (let ((disp (- (cdr (assoc (car list) *labels*)) (+ (point) 2))))
- (insert (logand disp 255)
- (/ (logand disp (* 255 256)) 256)))
- (setq list (cdr list)))))
- ))))
- ((eq (car (car alist)) ':epsilon)
- (regexp-code-gen* (cdr (car alist))))
- (t
- (let ((key (car (car alist)))
- (next (cdr (car alist))))
- (cond ((symbolp key)
- (insert (eval key)))
- ((TREX-memequal (car key) '(CHARSET CHARSET_NOT))
- (let ((charset (cdr key))
- (min 128) (max -1)
- (mcbytes 0)
- (mcchars nil))
- (let ((i 0))
- (while (< i 256)
- (aset *regexp-charset-table* i nil)
- (TREX-inc i)))
- (while charset
- (cond((stringp (car charset))
- (cond((eq (length (car charset)) 1)
- (aset *regexp-charset-table* (aref (car charset) 0) t)
- (if (< (aref (car charset) 0) min)
- (setq min (aref (car charset) 0)))
- (if (< max (aref (car charset) 0))
- (setq max (aref (car charset) 0)))
- )
- (t
- (TREX-inc mcbytes (* 2 (length (car charset))))
- (if (null mcchars) (setq mcchars charset))
- )))
- ((consp (car charset)) ;;; range
- (cond ((eq (length (nth 1 (car charset))) 1)
- (let ((from (aref (nth 1 (car charset)) 0))
- (to (aref (nth 2 (car charset)) 0)))
- (if (< from min) (setq min from))
- (if (< max to) (setq max to))
- (while (<= from to)
- (aset *regexp-charset-table* from t)
- (TREX-inc from)))
- )
- (t
- (TREX-inc mcbytes
- (+ (length (nth 1 (car charset))) (length (nth 2 (car charset)))))
- (if (null mcchars) (setq mcchars charset))))))
- (setq charset (cdr charset)))
- (cond ((< max min)
- (insert (if (eq (car key) 'CHARSET) CHARSET CHARSET_NOT)
- (if (< 0 mcbytes) 128 0)))
- (t
- (let ((minb (/ min 8))
- (maxb (1+ (/ max 8))))
- (insert (if (eq (car key) 'CHARSET) CHARSET_M CHARSET_M_NOT)
- minb (+ (if (< 0 mcbytes) 128 0) (- maxb minb)))
- (let ((b minb))
- (while (< b maxb)
- (let ((i 7) (bits 0))
- (while (<= 0 i)
- (if (aref *regexp-charset-table* (+ (* 8 b) i))
- ;;;; bits table$B$N=g=x$O<!$NDL$j(B
- (TREX-inc bits (aref [1 2 4 8 16 32 64 128] i)))
- (TREX-dec i))
- (insert bits))
- (TREX-inc b))))))
-
- (cond( (< 0 mcbytes)
- (TREX-inc mcbytes 2)
- (insert (/ mcbytes 256) (mod mcbytes 256))
- (while mcchars
- (cond((stringp (car mcchars))
- (insert (car mcchars) (car mcchars)))
- ((consp (car mcchars))
- (insert (nth 1 (car mcchars)) (nth 2 (car mcchars)))))
- (setq mcchars (cdr mcchars)))))
- ))
- ((= (length key) 1)
- (insert (eval (car key))))
- ((= (length key) 2)
- (insert (eval (car key)) (nth 1 key)))
- ((= (length key) 3)
- (insert (eval (car key)) (nth 1 key) (nth 2 key)))
- (t
- (regexp-error)))
- (regexp-code-gen* next))))
- (if point
- (let ((disp (- (point) point)))
- (save-excursion
- (goto-char point)
- (delete-char -2)
- (insert (logand disp 255)
- (/ (logand disp (* 255 256)) 256)))
- (regexp-code-gen-alist nextalist))))))
-
- (defun regexp-code-gen-exact (chars node)
- (let ((alist (cdr (assoc node *table*))))
- (cond((and (null (assoc node *labels*))
- (= (length alist) 1)
- (numberp (car (car alist))))
- (regexp-code-gen-exact (cons (car (car alist)) chars)
- (cdr (car alist))))
- (t
- (regexp-code-gen-exact* (reverse chars))
- (regexp-code-gen* node)))))
-
- (defun regexp-code-gen-exact* (chars)
- (cond((= (length chars) 1)
- (insert EXACT1 (car chars)))
- ((= (length chars) 2)
- (insert EXACT2 (car chars) (nth 1 chars)))
- ((= (length chars) 3)
- (insert EXACT3 (car chars) (nth 1 chars) (nth 2 chars)))
- (t
- (insert EXACTN (length chars))
- (let ((list chars))
- (while list
- (insert (car list))
- (setq list (cdr list)))))))
-
- ;;;
- ;;; regexp-code-dump
- ;;; $B@55,I=8=$N%3!<%I$rI=<($9$k!%(B
- ;;;
-
- (defvar *regexp-code-dump* nil)
- (defvar *regexp-code-index* nil)
-
- (defun regexp-code-dump (*regexp-code-dump*)
- (terpri)
- (let ((*regexp-code-index* 0)
- (max (length *regexp-code-dump*)))
- (while (< *regexp-code-index* max)
- (princ (format "%4d:" *regexp-code-index*))
- (let((op (aref *regexp-code-dump* *regexp-code-index*)))
- (cond((= op UNUSED) (regexp-code-dump-0 "unused"))
- ((= op EXACTN)
- (princ (format "exactn(%d) " (aref *regexp-code-dump* (1+ *regexp-code-index*))))
- (let ((j (+ *regexp-code-index* 2))
- (max (+ *regexp-code-index* 2 (aref *regexp-code-dump* (1+ *regexp-code-index*)))))
- (while (< j max)
- (princ (format "%c" (aref *regexp-code-dump* j)))
- (TREX-inc j))
- (setq *regexp-code-index* j))
- (terpri)
- )
- ((= op BEGLINE) (regexp-code-dump-0 "begline"))
- ((= op ENDLINE) (regexp-code-dump-0 "endline"))
- ((= op JUMP) (regexp-code-dump-jump "jump"))
- ((and (= regexp-version 19)
- (= op JUMP_PAST_ALT))
- (regexp-code-dump-jump "jump_past_alt"))
- ((= op ON_FAILURE_JUMP ) (regexp-code-dump-jump "on_failure_jump"))
- ((and (= regexp-version 19)
- (= op ON_FAILURE_KEEP_STRING_JUMP))
- (regexp-code-dump-jump "on_failure_keep_string_jump"))
- ((and (= regexp-version 18)
- (= op FINALIZE_JUMP))
- (regexp-code-dump-jump "finalize_jump"))
- ((and (= regexp-version 18)
- (= op MAYBE_FINALIZE_JUMP))
- (regexp-code-dump-jump "maybe_finalize_jump"))
- ((and (= regexp-version 19)
- (= op POP_FAILURE_JUMP))
- (regexp-code-dump-jump "pop_failure_jump"))
- ((and (= regexp-version 19)
- (= op MAYBE_POP_JUMP))
- (regexp-code-dump-jump "maybe_pop_jump"))
- ((= op DUMMY_FAILURE_JUMP) (regexp-code-dump-jump "dummy_failure_jump"))
- ((and (= regexp-version 19)
- (= op PUSH_DUMMY_FAILURE))
- (regexp-code-dump-0 "push_dummy_failure"))
- ((and (= regexp-version 19)
- (= op SUCCEED_N))
- (regexp-code-dump-jump-2 "succeed_n"))
- ((and (= regexp-version 19)
- (= op JUMP_N))
- (regexp-code-dump-jump-2 "jump_n"))
- ((and (= regexp-version 19)
- (= op SET_NUMBER_AT))
- (regexp-code-dump-jump-2 "SET_NUMBER_AT"))
- ((= op ANYCHAR) (regexp-code-dump-0 "anychar"))
- ((= op CHARSET) (regexp-code-dump-charset "charset"))
- ((= op CHARSET_NOT) (regexp-code-dump-charset "charset_not"))
- ((= op START_MEMORY)
- (if (= regexp-version 19)
- (regexp-code-dump-2 "start_memory")
- (regexp-code-dump-1 "start_memory")))
- ((= op STOP_MEMORY)
- (if (= regexp-version 19)
- (regexp-code-dump-2 "stop_memory")
- (regexp-code-dump-1 "stop_memory")))
- ((= op DUPLICATE) (regexp-code-dump-1 "duplicate"))
- ((= op BEFORE_DOT) (regexp-code-dump-0 "before_dot"))
- ((= op AT_DOT) (regexp-code-dump-0 "at_dot"))
- ((= op AFTER_DOT) (regexp-code-dump-0 "after_dot"))
- ((= op BEGBUF) (regexp-code-dump-0 "begbuf"))
- ((= op ENDBUF) (regexp-code-dump-0 "endbuf"))
- ((= op WORDCHAR) (regexp-code-dump-0 "wordchar"))
- ((= op NOTWORDCHAR) (regexp-code-dump-0 "notwordchar"))
- ((= op WORDBEG) (regexp-code-dump-0 "wordbeg"))
- ((= op WORDEND) (regexp-code-dump-0 "wordend"))
- ((= op WORDBOUND) (regexp-code-dump-0 "wordbound"))
- ((= op NOTWORDBOUND) (regexp-code-dump-0 "notwordbound"))
- ((= op SYNTAXSPEC) (regexp-code-dump-syntax "syntaxspec"))
- ((= op NOTSYNTAXSPEC) (regexp-code-dump-syntax "notsyntaxspec"))
- ((= op EXACT1) (regexp-code-dump-1ch "EXACT1"))
- ((= op EXACT2)
- (princ (format "EXACT2 %c%c\n" (aref *regexp-code-dump* (1+ *regexp-code-index*))
- (aref *regexp-code-dump* (+ *regexp-code-index* 2))))
- (TREX-inc *regexp-code-index* 3))
- ((= op EXACT3)
- (princ (format "EXACT3 %c%c%c\n"
- (aref *regexp-code-dump* (1+ *regexp-code-index*))
- (aref *regexp-code-dump* (+ *regexp-code-index* 2))
- (aref *regexp-code-dump* (+ *regexp-code-index* 3))))
- (TREX-inc *regexp-code-index* 4))
- ((= op CHARSET_M) (regexp-code-dump-charset-m "CHARSET_M"))
- ((= op CHARSET_M_NOT) (regexp-code-dump-charset-m "CHARSET_M_NOT"))
- ((= op CASEN)
- (princ (format "CASEN %d\n" (aref *regexp-code-dump* (1+ *regexp-code-index*))))
- (let ((j (+ *regexp-code-index* 2))
- (max (+ *regexp-code-index* 2 (* 2 (aref *regexp-code-dump* (1+ *regexp-code-index*))))))
- (while (< j max)
- (princ (format "[%d]::%d\n" (1+ (/ (- j (+ *regexp-code-index* 2)) 2))
- (regexp-get-absolute-address
- (+ j 2) (aref *regexp-code-dump* j)
- (aref *regexp-code-dump* (1+ j)))))
- (TREX-inc j 2))
- (let ((ch (aref *regexp-code-dump* j)) (chmax (aref *regexp-code-dump* (1+ j))))
- (princ (format "%c::%c\n" ch chmax))
- (TREX-inc j 2)
- (while (<= ch chmax)
- (princ (format "%c=>[%d]\n" ch (aref *regexp-code-dump* j)))
- (TREX-inc j)
- (TREX-inc ch)))
- (setq *regexp-code-index* j)))
- ((= op ON_FAILURE_SUCCESS) (regexp-code-dump-0 "ON_FAILURE_SUCCESS"))
- ((= op SUCCESS) (regexp-code-dump-0 "SUCCESS"))
- ((= op POP) (regexp-code-dump-0 "POP"))
- ((= op EXCEPT0) (regexp-code-dump-0 "EXCEPT0"))
- ((= op EXCEPT1) (regexp-code-dump-1ch "EXCEPT1"))
- ((= op CATEGORYSPEC) (regexp-code-dump-1ch "CATEGORYSPEC"))
- ((= op NOTCATEGORYSPEC) (regexp-code-dump-1ch "NOTCATEGORYSPEC"))
- (t (princ (format "unknown op=%d\n" op))
- (TREX-inc *regexp-code-index*)))))
- (princ (format "%4d:\n" *regexp-code-index*)))
- nil
- )
-
- (defun regexp-code-dump-0 (op)
- (princ op) (terpri)
- (TREX-inc *regexp-code-index*))
-
- (defun regexp-code-dump-1 (op)
- (princ (format "%s %d\n" op (aref *regexp-code-dump* (1+ *regexp-code-index*))))
- (TREX-inc *regexp-code-index* 2))
-
- (defun regexp-code-dump-2 (op)
- (princ (format "%s %d %d\n"
- op
- (aref *regexp-code-dump* (1+ *regexp-code-index*))
- (aref *regexp-code-dump* (+ *regexp-code-index* 2))
- ))
- (TREX-inc *regexp-code-index* 3))
-
- (defun regexp-code-dump-syntax (op)
- (princ (format "%s %c\n" op (syntax-code-spec (aref *regexp-code-dump* (1+ *regexp-code-index*)))))
- (TREX-inc *regexp-code-index* 2))
-
- (defun regexp-code-dump-1ch (op)
- (princ (format "%s %c\n" op (aref *regexp-code-dump* (1+ *regexp-code-index*))))
- (TREX-inc *regexp-code-index* 2))
-
- (defun regexp-get-absolute-address (point b1 b2)
- (cond ((< b2 128)
- (+ point (+ (* 256 b2) b1)))
- (t
- (+ point (logior (logxor -1 (+ (* 255 256) 255)) (* 256 b2) b1)))))
-
- (defun regexp-code-dump-jump (op)
- (let* ((b1 (aref *regexp-code-dump* (1+ *regexp-code-index*)))
- (b2 (aref *regexp-code-dump* (+ *regexp-code-index* 2)))
- (p (regexp-get-absolute-address (+ *regexp-code-index* 3) b1 b2)))
- (princ (format "%s %d\n" op p)))
- (TREX-inc *regexp-code-index* 3))
-
- (defun regexp-code-dump-jump-2 (op)
- (let* ((b1 (aref *regexp-code-dump* (1+ *regexp-code-index*)))
- (b2 (aref *regexp-code-dump* (+ *regexp-code-index* 2)))
- (p (regexp-get-absolute-address (+ *regexp-code-index* 3) b1 b2)))
- (princ (format "%s %d %d\n" op p
- (+
- (* 256 (aref *regexp-code-dump* (+ *regexp-code-index* 3)))
- (aref *regexp-code-dump* (+ *regexp-code-index* 4))))))
- (TREX-inc *regexp-code-index* 5))
-
- (defun regexp-code-dump-charset (op)
- (let ((n (aref *regexp-code-dump* (1+ *regexp-code-index*))))
- (princ (format "%s %d " op n))
- (let ((j (+ *regexp-code-index* 2))
- (max (+ *regexp-code-index* 2 (if (<= 128 n) (- n 128) n))))
- (while (< j max)
- (princ (format "0x%2x " (aref *regexp-code-dump* j)))
- (TREX-inc j))
- (cond((<= 128 n)
- (let* ((len (+ (* 256 (aref *regexp-code-dump* j))
- (aref *regexp-code-dump* (1+ j))))
- (last (+ j len)))
- (princ (format "\n range list[%d-2 bytes]" len))
- (TREX-inc j 2)
- (while (< j last)
- (let ((ch (sref *regexp-code-dump* j)))
- (princ (format " %c" ch))
- (TREX-inc j (char-octets ch))
- (setq ch (sref *regexp-code-dump* j))
- (princ (format "-%c" ch))
- (TREX-inc j (char-octets ch))))
- )))
- (setq *regexp-code-index* j)
- (terpri))
- ))
-
- (defun regexp-code-dump-charset-m (op)
- (let ((m (aref *regexp-code-dump* (1+ *regexp-code-index*)))
- (n (aref *regexp-code-dump* (+ *regexp-code-index* 2))))
- (princ (format "%s %d %d " op m n))
- (let ((j (+ *regexp-code-index* 3))
- (max (+ *regexp-code-index* 3 (if (<= 128 n) (- n 128) n))))
- (while (< j max)
- (princ (format "0x%02x " (aref *regexp-code-dump* j)))
- (TREX-inc j))
- (cond((<= 128 n)
- (let* ((len (+ (* 256 (aref *regexp-code-dump* j))
- (aref *regexp-code-dump* (1+ j))))
- (last (+ j len)))
- (princ (format "\n range list[%d-2 bytes]" len))
- (TREX-inc j 2)
- (while (< j last)
- (let ((ch (sref *regexp-code-dump* j)))
- (princ (format " %c" ch))
- (TREX-inc j (char-octets ch))
- (setq ch (sref *regexp-code-dump* j))
- (princ (format "-%c" ch))
- (TREX-inc j (char-octets ch))))
- )))
- (setq *regexp-code-index* j)
- (terpri)
- )))
-
- ;;;
- ;;; Compile functions
- ;;;
-
- (defun TREX-simple-test1 ()
- (regexp-word-compile
- "\\cA+\\cH*\\|\\cK+\\cH*\\|\\cC+\\cH*\\|\\cH+\\|\\sw+"))
-
- (defun TREX-test1 (pattern)
- (let* ((regexp (regexp-parse pattern))
- (fFA (EFFA-make (FA-make regexp)))
- (bFA (EFFA-make (FA-inverse fFA)))
- (l (cdr fFA))
- (result nil))
- (TREX-push (cons (DFA-optimize (DFA-make fFA))
- (DFA-optimize (DFA-make bFA)))
- result)
- (while l
- (let* ((forward (NEFA-make (EFFA-make (cons (car (car l)) (cdr fFA)))))
- (backward (NEFA-make (EFFA-make (cons (car (car l)) (cdr bFA))))))
- (cond((and forward backward)
- (TREX-push (cons (DFA-optimize (FA-simplify (DFA-shortest-match (DFA-make forward))))
- (DFA-optimize (FA-simplify (DFA-shortest-match (DFA-make backward)))))
- result))))
- (setq l (cdr l)))
- (setq result (reverse result))
- (let ((count 0))
- (while result
- (princ (format "\nForward[%2d]:" count)) (FA-dump (car (car result)))
- (princ (format "\nBackward[%2d]:" count)) (FA-dump (cdr (car result)))
- (TREX-inc count)
- (setq result (cdr result))))))
-
- (defun TREX-test2 (pattern)
- (let* ((regexp (regexp-parse pattern))
- (fFA (EFFA-make (FA-make regexp)))
- (l (cdr fFA))
- (result nil))
- (regexp-code-dump (setq result (regexp-code-gen (DFA-optimize (DFA-make fFA)))))
- result))
-
- ;;;###autoload
- (defun regexp-compile (pattern)
- (regexp-compile-internal pattern nil))
-
- ;;;###autoload
- (defun regexp-word-compile (pattern)
- (regexp-compile-internal pattern t))
-
- ;;;
- ;;; Returns a list of pair of forward-code and backward-code
- ;;;
-
-
- (defun regexp-compile-internal (pattern &optional word)
- (let* ((*regexp-word-definition* word)
- (*regexp-parse-translate*
- (if case-fold-search
- ;;; DOWNCASE or CANONICAL?
- (nth 2 (current-case-table))
- nil))
- (regexp (regexp-parse pattern))
- (fFA (EFFA-make (FA-make (regexp-reform-duplication regexp))))
- (bFA (EFFA-make (FA-make (regexp-reform-duplication (regexp-inverse regexp)))))
- (result nil))
- (let ((ofFA (DFA-optimize (DFA-make fFA)))
- (obFA (DFA-optimize (DFA-make bFA))))
- (TREX-push (cons (DFA-code-with-fastmap ofFA)
- (let* ((START_MEMORY STOP_MEMORY)
- (STOP_MEMORY START_MEMORY))
- (DFA-code-with-fastmap obFA)))
- result))
- (if word
- (let ((l (cdr fFA))
- (bFA (EFFA-make (FA-inverse fFA))))
- (while l
- (let* ((forward (NEFA-make (EFFA-make (cons (car (car l)) (cdr fFA)))))
- (backward (NEFA-make (EFFA-make (cons (car (car l)) (cdr bFA))))))
- (cond((and forward backward)
- (let ((fFA (DFA-optimize (FA-simplify (DFA-shortest-match (DFA-make forward)))))
- (bFA (DFA-optimize (FA-simplify (DFA-shortest-match (DFA-make backward))))))
- (TREX-push (cons (DFA-code-with-fastmap fFA)
- (DFA-code-with-fastmap bFA))
- result)))))
- (setq l (cdr l)))
- (setq result (nreverse result))))
- result))
-
- (defun regexp-compiled-pattern-dump (pattern)
- ;;; PATTERN is a vector of [ code fastmap fastmap-syntax fastmap-categoy]
- (regexp-code-dump (aref pattern 0))
- (print-fastmap (aref pattern 1) " fastmap[char]")
- (print-fastmap (aref pattern 2) " fastmap[synt]")
- (print-fastmap (aref pattern 3) " fastmap[cate]")
- )
-
- (defun regexp-compile-dump (code)
- (let ((Fcode (aref (car (car code)) 0))
- (Bcode (aref (cdr (car code)) 0))
- (words (cdr code)))
- (princ (format "\nRegular Expression Compiler Dump:\n"))
- (princ (format "Forward Search:"))
- (regexp-compiled-pattern-dump (car (car code)))
- (princ (format "Backward Search:"))
- (if Bcode (regexp-compiled-pattern-dump (cdr (car code)))
- (princ (format "\n Use the interpreter\n")))
- (if words
- (let ((i 1))
- (princ (format "In word conditions:\n"))
- (while words
- (princ (format "Forward[%d]" i))
- (regexp-compiled-pattern-dump (car (car words)))
- (princ (format "Backward[%d]" i))
- (regexp-compiled-pattern-dump (cdr (car words)))
- (TREX-inc i)
- (setq words (cdr words)))))))
-
- (defun regexp-compile-and-dump (regexp)
- (regexp-compile-dump (regexp-compile regexp)))
-
-
- ;;;###autoload
- (defmacro define-word-regexp (name regexp)
- (` (defconst (, name) '(, (regexp-word-compile regexp)))))
-
- (put 'define-word-regexp 'byte-hunk-handler ;93.7.16 by S.Tomura
- 'macroexpand)
-
- ;;;
- ;;; Coding system
- ;;;
-
- (defmacro define-coding-systems (&rest rest)
- (` (define-coding-systems* '(, rest))))
-
- (defun define-coding-systems* (names)
- (let ((systems
- (` (:or (,@ (mapcar (function (lambda (name) (` (:seq (, (regexp-get-definition name))
- (, name)))))
- names))))))
- systems))
-
- (defun oct (str) (aref str 0))
-
- (defvar *TREX-range-from* nil)
- (defvar *TREX-range-to* nil)
-
- (defun TREX-range-make-jisjoint (regexp)
- (TREX-init *TREX-range-from* (make-vector 256 nil))
- (TREX-init *TREX-range-to* (make-vector 256 nil))
- (let ((i 0))
- (while (< i 256)
- (aset *TREX-range-from* i nil)
- (aset *TREX-range-to* i nil)
- (TREX-inc i)))
- (aset *TREX-range-from* 0 t)
- (aset *TREX-range-to* 255 t)
- (TREX-range-mark regexp)
- (TREX-range-replace regexp))
-
- (defun TREX-range-mark (regexp)
- (cond
- ((consp regexp)
- (let ((op (car regexp)))
- (cond((eq op ':mark)
- (TREX-range-mark (nth 3 regexp)))
- ((eq op ':or)
- (mapcar 'TREX-range-mark (cdr regexp)))
- ((eq op ':seq)
- (mapcar 'TREX-range-mark (cdr regexp)))
- ((eq op ':optional)
- (TREX-range-mark (nth 1 regexp)))
- ((eq op ':star)
- (TREX-range-mark (nth 1 regexp)))
- ((eq op ':plus)
- (TREX-range-mark (nth 1 regexp)))
- ((eq op ':range)
- (TREX-range-mark2 (nth 1 regexp) (nth 2 regexp))))))
- ((stringp regexp)
- (TREX-range-mark2 regexp regexp))
- ((numberp regexp)
- (TREX-range-mark2 regexp regexp))))
-
- (defun TREX-range-mark2 (from to)
- (if (stringp from) (setq from (aref from 0)))
- (if (stringp to) (setq to (aref to 0)))
- (if (< 0 from) (aset *TREX-range-to* (1- from) t))
- (if (< to 255) (aset *TREX-range-from* (1+ to) t))
- (aset *TREX-range-from* from t)
- (aset *TREX-range-to* to t))
-
- (defun TREX-range-replace (regexp)
- (cond
- ((consp regexp)
- (let ((op (car regexp)))
- (cond((eq op ':mark)
- (` (:mark (, (nth 1 regexp))
- (, (nth 2 regexp))
- (, (TREX-range-replace (nth 3 regexp))))))
- ((eq op ':or)
- (` (:or (,@ (mapcar 'TREX-range-replace (cdr regexp))))))
- ((eq op ':seq)
- (` (:seq (,@ (mapcar 'TREX-range-replace (cdr regexp))))))
- ((eq op ':optional)
- (` (:optional (,(TREX-range-replace (nth 1 regexp))))))
- ((eq op ':star)
- (` (:star (,(TREX-range-replace (nth 1 regexp))))))
- ((eq op ':plus)
- (` (:plus (,(TREX-range-replace (nth 1 regexp))))))
- ((eq op ':range)
- (let ((from (nth 1 regexp))
- (to (nth 2 regexp))
- i j
- (result nil))
- (if (stringp from) (setq from (aref from 0)))
- (if (stringp to ) (setq to (aref to 0)))
- (setq i from
- j from)
- (while (<= i to)
- (while (not (aref *TREX-range-to* j))
- (TREX-inc j))
- (if (not (= i j)) (TREX-push (` (:range (, i) (, j))) result)
- (TREX-push i result))
- (TREX-inc j)
- (setq i j))
- (if (= (length result) 1) (car result)
- (` (:or (,@ (nreverse result))))))))))
- ((stringp regexp)
- (if (= (length regexp) 1)
- (aref regexp 0)
- regexp))
- ((numberp regexp)
- regexp)
- (t regexp)))
-
- (defun FA-sort (FA)
- (let ((start (car FA))
- (alist (cdr FA)))
- (setq alist (sort alist 'TREX-lessp-car))
- (while alist
- (setcdr (car alist) (sort (cdr (car alist)) 'TREX-lessp-car))
- (setcdr (car alist ) (TREX-sort (cdr (car alist)) 'TREX-key-lessp 'cdr))
- (setq alist (cdr alist)))
- FA))
-
- ;;;
- ;;; CHARSET functions:
- ;;;
- ;;; CHARSET ::= RANGE |
- ;;; (:or RANGE+) |
- ;;; (:nor RANGE+)
- ;;; RANGE+ ::= CHAR |
- ;;; (:range CHAR CHAR)
- ;;;
-
- (defun CHARSET-rangep (charset)
- (or (numberp charset)
- (and (consp charset) (eq (car charset) ':range))))
-
- (defun CHARSET-orp (charset)
- (and (consp charset) (eq (car charset) ':or)))
-
- (defun CHARSET-range-from (range)
- (if (numberp range) range
- (nth 1 range)))
-
- (defun CHARSET-range-to (range)
- (if (numberp range) range
- (nth 2 range)))
-
- (defun CHARSET-range-make (from to)
- (if (= from to) from
- (list ':range from to)))
-
- (defun CHARSET-membership (range charset)
- (let ((from (CHARSET-range-from range))
- (to (CHARSET-range-to range))
- (flag nil))
- (while (and charset flag1)
- (if (< from (CHARSET-range-from (car charset)))
- (setq charset (cdr charset))
- (setq flag t)))
- (and flag1 (<= to (CHARSET-range-to (car charset))))))
-
- (defun CHARSET-not (charset)
- (cond((CHARSET-rangep charset)
- (list ':nor charset))
- ((CHARSET-orp charset)
- (cons ':nor (cdr charset)))
- (t
- (cons ':or (cdr charset)))))
-
- (defun CHARSET-union (charset1 charset2)
- (cond((CHARSET-rangep charset1)
- (cond ((CHARSET-rangep charset2)
- (CHARSET-union-range-range charset1 charset2))
- ((CHARSET-orp charset2)
- (CHARSET-union-range-or charset1 charset2))
- (t
- (CHARSET-union-range-nor charset1 charset2))))
- ((CHARSET-orp charset1)
- (cond ((CHARSET-rangep charset2)
- (CHARSET-union-range-or charset2 charset1))
- ((CHARSET-orp charset2)
- (CHARSET-union-or-or charset1 charset2))
- (t
- (CHARSET-union-or-nor charset1 charset2))))
- (t ;;; (CHARSET-norp charset1)
- (cond((CHARSET-rangep charset2)
- (CHARSET-union-range-nor charset2 charset1))
- ((CHARSET-orp charset2)
- (CHARSET-union-or-nor charset2 charset1))
- (t
- (CHARSET-union-nor-nor charset1 charset2))))))
-
- (defun CHARSET-union-range-range (range1 range2)
- (let ((from1 (CHARSET-range-from range1))
- (to1 (CHARSET-range-to range1))
- (from2 (CHARSET-range-from range2))
- (to2 (CHARSET-range-to range2)))
- (cond((< to1 from2)
- (list ':or range1 range2))
- (t ;;; (<= from2 (1+ to1))
- (cond((<= to1 to2) ;;; (<= from2 to1 to2)
- (CHARSET-range-make (min from1 from2) to2))
- ((<= from1 to2) ;;; (<= from1 to2 to1)
- (CHARSET-range-make (min from1 from2) to1))
- (t ;;; (<= to2 from1 to1)
- (list ':or range2 range1)))))))
-
- (defun CHARSET-union-range-or (range or)
- (cons ':or (CHARSET-union-range-or* range (cdr or))))
-
- (defun CHARSET-union-range-or* (range or-body)
- (let ((from (CHARSET-range-from range))
- (to (CHARSET-range-to range))
- (part1 nil))
- (let ((flag nil))
- (while (and or-body (null flag))
- (let ((next (car or-body)))
- (if (< (CHARSET-range-from next) from)
- ;;; from[i] < from
- (if (< (CHARSET-range-to next) from)
- ;;; to[i] < from
- (setq part1 (cons next part1)
- or-body (cdr or-body))
- ;;; from[i] < from <= to[i]
- (setq from (CHARSET-range-from next)
- flag t))
- ;;; from <= from[1]
- ;;; to[i-1] < from <= from[i]
- (setq flag t)))))
- ;;; part1 < from <= from[i]
- (if (and part1 (<= (1+ (CHARSET-range-to (car part1))) from))
- (setq from (CHARSET-range-from (car part1))
- part1 (cdr part1)))
- ;;; part1 << from <= from[i]
- (let ((flag nil))
- (while (and or-body (null flag))
- (let ((next (car or-body)))
- (if (< (CHARSET-range-from next) to)
- ;;; from[j] < from
- (if (< (CHARSET-range-to next) to)
- ;;; to[j] < to
- (setq or-body (cdr or-body))
- ;;; from[j] < to <= to[j]
- (setq to (CHARSET-range-to next)
- flag t))
- ;;; to <= from[1]
- ;;; to[j-1] < to <= from[j]
- (setq flag t)))))
- ;;; part2 < to <= from[j]
- (if (and or-body (<= (CHARSET-range-from (car or-body)) (1+ to)))
- (setq to (CHARSET-range-to (car or-body))
- or-body (cdr or-body)))
- ;;; part2 <= to << from[j]
- (nconc (reverse part1)
- (cons (CHARSET-range-make from to)
- or-body))))
-
-
- (defun CHARSET-union-range-nor (range nor)
- (let ((from (CHARSET-range-from range))
- (to (CHARSET-range-to range))
- (nor-body (cdr nor)))
-
- ))
-
- (defun CHARSET-union-or-or (or1 or2)
- (cons ':or (CHARSET-union-or*-or* (cdr or1) (cdr or2))))
-
- (defun CHARSET-union-or*-or* (or1-body or2-body)
- (let ((result-body or2-body))
- (while or1-body
- (setq result-body
- (CHARSET-union-range-or* (car or1-body) result-body))
- (setq or1-body (cdr or1-body)))
- result-body))
-
- (defun CHARSET-union-or-nor (or nor)
- )
-
- (defun CHARSET-union-nor-nor (nor1 nor2)
- (cons ':nor (CHARSET-intersection-or*-or* (cdr nor1) (cdr nor2))))
-
- (defun CHARSET-intersection (charset1 charset2)
- (cond((CHARSET-rangep charset1)
- (cond ((CHARSET-rangep charset2)
- (CHARSET-intersection-range-range charset1 charset2))
- ((CHARSET-orp charset2)
- (CHARSET-intersection-range-or charset1 charset2))
- (t
- (CHARSET-intersection-range-nor charset1 charset2))))
- ((CHARSET-orp charset1)
- (cond ((CHARSET-rangep charset2)
- (CHARSET-intersection-range-or charset2 charset1))
- ((CHARSET-orp charset2)
- (CHARSET-intersection-or-or charset1 charset2))
- (t
- (CHARSET-intersection-or-nor charset1 charset2))))
- (t ;;; (CHARSET-norp charset1)
- (cond((CHARSET-rangep charset2)
- (CHARSET-intersection-range-nor charset2 charset1))
- ((CHARSET-orp charset2)
- (CHARSET-intersection-or-nor charset2 charset1))
- (t
- (CHARSET-intersection-nor-nor charset1 charset2))))))
-
- (defun CHARSET-intersection-range-or (range or)
- (CHARSET-intersection-range-or* range (cdr or)))
-
- (defun CHARSET-intersection-range-or* (range or-body)
- (let ((from (CHARSET-range-from range))
- (to (CHARSET-range-to range))
- (part2 nil))
- (let ((flag nil))
- (while (and or-body (null flag))
- (let ((next (car or-body)))
- (if (< (CHARSET-range-from next) from)
- ;;; from[i] < from
- (if (< (CHARSET-range-to next) from)
- ;;; to[i] < from
- (setq or-body (cdr or-body))
- ;;; from[i] < from <= to[i]
- (setq flag t))
- ;;; from <= from[1]
- ;;; to[i-1] < from <= from[i]
- (setq flag t)))))
- ;;; from[i] < from <= to[i]
- ;;; from <= from[1]
- ;;; to[i-1] < from <= from[i]
- (let ((flag nil))
- (while (and or-body (null flag))
- (let ((next (car or-body)))
- (if (<= (CHARSET-range-from next) to)
- ;;; from[j] <= to
- (if (<= (CHARSET-range-to next) to)
- ;;; to[j] <= to
- (setq part2 (cons next part2)
- or-body (cdr or-body))
- ;;; from[j] <= to < to[j]
- (setq part2 (cons next part2)
- or-body (cdr or-body)
- flag t)
- ;;; to < from[1]
- ;;; to[j-1] <= to < from[j]
- (setq flag t)))))
- ;;; from[j] <= to < to[j]
- ;;; to < from[1]
- ;;; to[j-1] <= to < from[j]
- (cond ((null part2) nil)
- ((= (length part2) 1)
- (list (CHARSET-range-make (max from (CHARSET-range-from (car part2)))
- (min to (CHARSET-range-to (car part2))))))
- (t
- (setcar part2 (CHARSET-range-make (CHARSET-range-from (car part2))
- (min to (CHARSET-range-to (car part2)))))
- (setq part2 (nreverse part2))
- (setcar part2 (CHARSET-range-make (max from (CHARSET-range-from (car part2)))
- (CHARSET-range-to (car part2))))
- part2)))))
-
- (defun CHARSET-intersection-range-nor (range nor)
- (CHARSET-intersection-range-nor* range (cdr nor)))
-
- (defun CHARSET-intersecion-range-nor* (range nor-body)
- (let ((from (CHARSET-range-from range))
- (to (CHARSET-range-to range)))
- ))
-
- ;;; (and (or a b) c) == (or (and a c) (and b c))
-
- (defun CHARSET-intersection-or-or (or1 or2)
- (let ((result nil)
- (or1-body (cdr or1))
- (or2-body (cdr or2)))
- (while or1-body
- (setq result (CHARSET-union-or*-or*
- (CHARSET-intersection-range-or* (car or1-body) or2-body)
- result))
- (setq or1-body (cdr or1-body)))
- (if (= (length result) 1) (car result)
- (cons ':or result))))
-
- (defun CHARSET-intersection-or-nor (or nor)
- )
-
- ;;; (and (not or1) (not or2)) == (not (or or1 or2))
-
- (defun CHARSET-intersection-nor-nor (nor1 nor2)
- (cons ':nor (CHARSET-union-or*-or* (cdr nor1) (cdr nor2))))
-
- (defun FA-compaction (FA)
- (let ((start (car FA))
- (alist (cdr FA)))
- (setq alist (TREX-sort alist 'TREX-key-lessp 'car))
- (while alist
- (let ((table (cdr (car alist)))
- (newtable nil)
- (keys nil) (next nil))
- (setq table (TREX-sort table '< 'car))
- (while table
- (setq next (cdr (car table)))
- (TREX-push (car (car table)) keys)
- (setq table (cdr table))
- (while (and table (eq next (cdr (car table))))
- (TREX-push (car (car table)) keys)
- (setq table (cdr table)))
- (setq keys (reverse (sort keys 'TREX-key-lessp)))
- (let ((newkeys nil))
- (setq newkeys (car keys)
- keys (cdr keys))
- (while keys
- (cond((numberp (car keys))
- (cond((numberp (car newkeys))
- (if (= (1+ (car keys)) (car newkeys))
- (setcar newkeys (list ':range (car keys) (car newkeys)))
- (TREX-push (car keys) newkeys)))
- ((and (consp (car newkeys)) (eq (car (car newkeys)) ':range)))))))))))))
-
-
-
- (defun FA-dump2 (table)
- (let ((start (car table))
- (l (cdr table)))
- (princ (format "\nstart = %d\n" start))
- (while l
- (princ (format "%3d: " (car (car l))))
- (let ((alist (cdr (car l))))
- (cond ((numberp (car (car alist)))
- (princ (format "\\%03o(%c) -> %s\n" (car (car alist))(car (car alist)) (cdr (car alist)))))
- ((and (consp (car (car alist))) (TREX-memequal (car (car (car alist))) '(CATEGORYSPEC NOTCATEGORYSPEC)))
- (princ (format "(%s %c) -> %s\n" (car (car (car alist))) (nth 1 (car (car alist))) (cdr (car alist)))))
- ((and (consp (car (car alist))) (eq (car (car (car alist))) ':range))
- (princ (format "(:range \\%03o \\%03o) -> %s\n"
- (nth 1 (car (car alist))) (nth 2 (car (car alist)))
- (cdr (car alist)))))
- (t
- (princ (format "%s -> %s\n" (car (car alist)) (cdr (car alist))))))
- (setq alist (cdr alist))
- (while alist
- (cond ((numberp (car (car alist)))
- (princ (format " \\%03o(%c) -> %s\n" (car (car alist))(car (car alist)) (cdr (car alist)))))
- ((and (consp (car (car alist))) (TREX-memequal (car (car (car alist))) '(CATEGORYSPEC NOTCATEGORYSPEC)))
- (princ (format " (%s %c) -> %s\n" (car (car (car alist))) (nth 1 (car (car alist))) (cdr (car alist)))))
- ((and (consp (car (car alist))) (eq (car (car (car alist))) ':range))
- (princ (format " (:range \\%03o \\%03o) -> %s\n"
- (nth 1 (car (car alist))) (nth 2 (car (car alist)))
- (cdr (car alist)))))
- (t
- (princ (format " %s -> %s\n" (car (car alist)) (cdr (car alist))))))
- (setq alist (cdr alist))))
- (setq l (cdr l)))))
-
- ;;;function re-compile REGEXP
- ;;;Compile REGEXP by GNU Emacs original regexp compiler,
- ;;;and return information of the compiled code by a vector of length 11:
- ;;; [ COMPILED-PATTERN (string)
- ;;; RE-NSUB REGS-ALLOCATED CAN-BE-NULL NEWLINE-ANCHOR (integers)
- ;;; NO-SUB NOT-BOL NOT-EOL SYNTAX (integers)
- ;;; FASTMAP TRANSLATE (string) ].
- ;;;
-
- (defun print-compiled-pattern (compiled-code)
- (let ((compiled-pattern (aref compiled-code 0))
- (re-nsub (aref compiled-code 1))
- (regs-allocated (aref compiled-code 2))
- (can-be-null (aref compiled-code 3))
- (newline-anchor (aref compiled-code 4))
- (no-sub (aref compiled-code 5))
- (not-bol (aref compiled-code 6))
- (not-eol (aref compiled-code 7))
- (syntax (aref compiled-code 8))
- (fastmap (aref compiled-code 9))
- (translate (aref compiled-code 10)))
- (regexp-code-dump compiled-pattern)
- ;;; fastmap
- (if fastmap (print-fastmap fastmap "fastmap"))
- (princ (format "re_nsub: %d\n" re-nsub))
- (princ (format "regs-alloc: %d\n" regs-allocated))
- (princ (format "can-be-null: %d\n" can-be-null))
- (princ (format "newline-anchor: %d\n" newline-anchor))
- (princ (format "no-sub: %d\n" no-sub))
- (princ (format "not-bol: %d\n" not-bol))
- (princ (format "not-eol: %d\n" not-eol))
- (princ (format "syntax: %d\n" syntax))
- (if translate (print-translate translate))
- ;;; translate
- nil
- ))
-
- (defun print-fastmap (fastmap name)
- (if fastmap
- (progn
- (princ (format "%s:[" name))
- (let ((max (length fastmap))
- (i 0))
- (while (< i max)
- (if (not (= (aref fastmap i) 0))
- (princ (format "%c" i)))
- (setq i (1+ i))))
- (princ "]\n"))))
-
- (defun print-translate (trans)
- (if trans
- (progn
- (princ "translate:\n")
- (let ((max (length trans))
- (i 0))
- (while (< i max)
- (if (not (= (aref trans i) i))
- (princ (format " %c --> %c" i (aref trans i))))
- (setq i (1+ i))))
- (princ "\n"))))
-
- (defun re-compile-and-dump (regexp)
- (print-compiled-pattern (re-compile regexp)))
-
-
-
-
-
-
-